
APPENDIX N 



COPYRIGHT 1998, LANGUAGE ANALYSIS SYSTEMS, INC. 



NAMEHUNTER 



// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
// 



File: NH^util.cpp 
Description: 

Implementation of various utility functions used in the SNAPI 



History: 



5/15/97 
3/20/98 



EFB 
EFB 



Created 

Changed names to NH from SN 



#include 



<string.h> 



include "NH_util.hpp M 
include "NHCompParms.hpp" 



// function to remove leading and trailing spaces from a string 
// in place. 

// Strips the string at either end or both ends. 

// Stripchars specify the characters that should 

// be stripped. We start by seeing if they want the 

// trailing chars stripped, which is easy. We simply 

// work backwards from the end of the string, looking for 

// the first non-strippable character, and terminate the 

// string just past that character. Then if they wanted 

// leading chars stripped, we work forwards to the first 

// non-strippable char, and then move that and each following 

// char to the beginning of the string. 

void NH_strip(char *aString) 

{ 

char *end_point; 
char *ch; 
int len; 

if ((len = strlen(aString)) != 0) { // if there is a string 
// start at end 

end_point = aString + len - 1 ; 



// 1 and work back till we get a non-space or get to 
// the begining of our string, chopping off what's left. 
// Also make sure we don't zoom right past the beginning of the 
// string. 

for (; strchr(NH_DEFAULT_WHITESPACE, *end joint) != NULL && 
aString; end_point-) 

// if string was all whitespace 

if ((end_point == aString) && strchr(NH_DEFAULT_ WHITESPACE, 
NULL) 

* aString = EOS; // erase it all, and we're done, could return here 

else 

*(end_point + 1) = EOS; // just chop off excess blanks 

// make sure there is still a string, since it might 
// have been stripped entirely above, 
if (*aString) { 

// now find first non space, we know string has at least one 
// nonwhite space, so we don't have to check for NULL, 
for (ch = aString; strchr(NH_DEFAULT_ WHITESPACE, *ch) != 
NULL; ch++) 

if (ch !~ aString) { // if there were leading spaces, move the block 

back 

char *target = aString; 
while (*ch != EOS) { 

* target = *ch; 

target++; 

ch++; 

} 

// and get the null char also 
* target = *ch; 
} // end if (are there leading spaces?) 
} // end if (and text left?) 
} // end (is there a string at all ?) 

} 



end_point != 



♦aString) ! = 



char * NH_strrchr(char *stringStart, char *searchPos. char searchChar) 
{ 

while (1) { 

if (*searchPos == searchChar) 
break; 

if (searchPos = stringStart) { 



NULL 



searchPos = NULL; 
break; 



// string not found, so return 



} 

searchPos- 

} 

return searchPos; 



// 

// File: NH_queens_arrays.hpp 

// 

// Description: 

// 

// Contains global definitions and declarations for the valid 

// combinations of indexes for the best score calculation 

// 
// 

// History: 

// 

// 6/4/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 

typedef unsigned char byte; 



byte twoByTwo[] = {1,0, 

0,1} 

byte twoByThree[] = { 1,2, 

1,0, 

2,1, 

2, 0, 

0,1, 

0,2}; 

byte twoByFour[] = { 1,2, 

1,3, 

1,0, 

2,1, 

2, 3, 

2,0, 



3,1, 
3, 2, 
3,0, 
0, 1, 
0,2, 
0,3};. 

byte twoByFive[] = { 1, 2, 

1,3, 

1,4, 

1,0, 

2,1, 

2,3, 

2, 4, 

2,0, 

3,1, 

3,2, 

3,4, 

3,0, 

4,1, 

4, 2, 

4, 3, 

4,0, 



0,1, 

0,2, 
0,3, 
0,4}; 

byte threeByThree[] = { 1 , 2, 0, 

1,0,2, 

2,1,0, 

2,0.1,-'- 

0,1,2, 

0, 2,1}; 

byte threeByFour[] = { 1,2,3, 

1.2, 0, 
1,3,2, 
1,3,0, 
1,0, 2, 
1,0,3, 
2,1,3, 
2,1,0, 

2.3, 1, 
2, 3, 0, 
2, 0,1, 
2, 0, 3, 



3,1,2, 
3,1,0, 
3,2,1, 
3,2, 0, 
3,0, 1, 
3, 0, 2, 
0,1,2, 
0,1,3, 
0,2,1, 
0, 2, 3, 
0,3,1, 
0,3,2}; 

byte threeByFive[] 

1,2, 4, 

1 "> 0 

1,3,2, 

1,3,4, 

1,3,0, 

1,4, 2, 

1,4,3, 

1,4, 0, 

1,0, 2, 



1,0,3, 
1,0, 4, 
2, 1, 3, 
2, 1,4, 
2,1,0, 
2,3, 1, 
2,3,4, 
2,3,0, 
2,4,1, 
2,4,3, 
2, 4, 0, 
2,0, 1, 
2, 0, 3, 

2, 0,4, 
3,1,2, 
3,1,4, 

3, 1,0, 
3,2, 1, 
3, 2, 4, 
3, 2, 0, 
3,4,1, 
3, 4, 2, 



3,4, 0, 
3,0,1, 

3, 0, 2, 
3,0, 4, 

4, 1,2, 
4, 1,3, 
4, 1,0, 
4, 2,1, 
4, 2, 3, 
4, 2, 0, 
4,3,1, 
4, 3, 2, 
4,3,0, 
4, 0,1, 
4, 0, 2, 
4, 0,3, 
0,1,2, 
0,1,3, 
0,1,4, 
0,2,1, 
0, 2,3, 
0, 2, 4, 



0,3,1, 
■ 0, 3, 2, 
0,3,4, 
0, 4, 1, 
0, 4, 2, 
0,4,3}; 

byte fourByFourf] 

1,2, 0, 3, 

1,3,0, 2, 

1,3,2, 0, 

1,0, 2,3, 

1,0,3,2, 

2,1,3,0, 

2,1,0,3, 

2,3,1,0, 

2,3,0,1, 

2, 0,1,3, 

2, 0,3,1, 

3,1,2, 0, 

3,1,0, 2, 

3,2, 1,0, 

3,2, 0, 1, 



{ 1,2,3,0, 



1 

3,0, 1,2, 

3,0, 2,1, 

0, 1,2,3, 

0,1,3,2, 

0,2,1,3, 

0 2 3 1 

0,3,1,2, 

0,3,2,1}; 

byte fourByFive[] 

1,2,3,0, 

1,2, 4,3, 

1,2, 4,0, 

1,2, 0,3, 

1,2,0, 4, 

1,3,2, 4, 

1,3,2,0, 

1,3,4, 2, 

1,3,4,0, 

1,3,0,2, 

1,3,0, 4, 

1,4,2,3, 

1,4, 2,0, 



{ 1,2,3,4, 



1,4,3,2, 
1,4,3,0, 
1,4, 0,2, 
1,4,0,3, 
1,0, 2,3, 
1,0,2, 4, 
1,0,3,2, 
1,0,3,4, 
1,0,4,2, 
1,0, 4,3, 
2, 1,3,4, 
2, 1,3,0, 
2,1,4,3, 
2, 1,4, 0, 
2,1,0,3, 
2,1,0, 4, 
2,3,1,4, 
2,3,1,0, 
2,3,4,1, 
2,3,4, 0, 
2,3,0, 1, 
2, 3, 0, 4, 



2, 4,1,3, 
2, 4, 1,0, 
2,4,3,1, 
2, 4, 3, 0, 
2, 4, 0,1, 
2, 4, 0,3, 
2, 0, 1,3, 
2,0,1,4, 
2, 0,3,1, 
2, 0,3,4, 
2, 0, 4,1, 

2, 0, 4, 3, 
3,2,1,4, 
3,2,1,0, 
3,2, 4,1, 
3,2, 4, 0, 
3,2, 0,1, 
3,2, 0,4, 
3,1,2,4, 
3,1,2, 0, 

3, 1,4, 2, 
3, 1,4, 0, 



3, 1,0, 2, 
3,1,0, 4, 
3,4, 2,1, 
3, 4, 2, 0, 
3,4,1,2, 
3,4, 1,0, 

3, 4, 0, 2, 
3,4, 0,1, 
3,0, 2,1, 
3,0, 2,4, 
3,0, 1,2, 
3,0, 1,4, 
3,0, 4, 2, 
3,0,4,1, 

4, 2,3,1, 
4,2,3,0, 
4, 2,1,3, 
4, 2,1,0, 
4,2,0,3, 
4,2,0, 1, 
4,3,2,1, 
4,3,2,0, 



4,3,1,2, 
4,3,1,0, 
4,3,0, 2, 
4,3,0,1, 
4,1,2,3, 
4, 1,2, 0, 
4,1,3,2, 
4,1,3,0, 
4,1,0, 2, 
4,1,0,3, 
4, 0, 2, 3, 
4, 0, 2, 1, 
4, 0, 3, 2, 
4, 0,3,1, 
4, 0, 1, 2, 
4, 0,1,3, 
0, 2,3,4, 
0, 2,3,1, 
0, 2,4,3, 
0, 2, 4,1, 
0,2,1,3, 
0,2, 1,4, 



0,3,2,4, 

0,3,2,1, 

0,3,4, 2, 

0,3,4,1, 

0,3,1,2, 

0,3,1,4, 

0,4,2,3, 

0,4,2,1, 

0, 4,3,2, 

0, 4,3,1, 

0,4,1,2, 

0, 4,1,3, 

0,1,2,3, 

0,1,2,4, 

0,1,3,2, 

0,1,3,4, 

0,1,4,2, 

0,1,4,3}; 

byte fiveByFive[] ={ 1,2,3,4,0, 
1,2,3,0, 4, 
1,2,4,3,0, 
1,2, 4, 0,3, 



1,2, 0,3,4, 
1,2, 0, 4, 2, 
1,3,2, 4, 0, 
1,3,2, 0, 4, 
1,3,4, 2, 0, 
1,3,4, 0, 2, 
1,3,0,2,4, 
1,3,0, 4, 2, 
1,4, 2,3,0, 
1,4, 2, 0,3, 
1,4,3,2, 0, 
1,4, 3,0, 2, 
1,4, 0, 2, 3, 
1,4, 0, 3,2, 
1,0, 2,3,4, 
1,0, 2, 4,3, 
1,0,3,2, 4, 
1,0,3,4, 2, 
1,0, 4, 2,3, 
1,0,4,3,2, 
2,1,3,4, 0, 
2, 1,3, 0, 4, 



2,1,4,3,0, 
2, 1,4, 0,3, 
2, 1,0,3,4, 
2,1,0, 4,1, 
2,3, 1,4, 0, 
2,3,1,0, 4, 
2,3,4,1,0, 
2,3,4, 0,1, 
2,3,0,1,4, 
2,3,0, 4,1, 
2,4,1,3,0, 
2, 4,1,0,3, 
2, 4,3,1,0, 
2, 4,3,0,1, 
2, 4, 0,1,3, 
2, 4, 0,3,1, 
2, 0,1,3,4, 
2, 0,1,4, 3, 
2, 0,3, 1,4, 
2,0,3,4, 1, 
2,0, 4,1,3, 
2,0, 4,3,1, 



3,2,1,4, 0, 
3,2,1,0, 4, 
3,2, 4,1,0, 
3,2, 4, 0,1, 
3,2, 0,1,4, 
3,2, 0,4, 2, 
3, 1,2,4, 0, 
3, 1,2, 0, 4, 
3,1,4,2, 0, 
3,1,4, 0,2, 
3,1,0, 2, 4, 
3, 1,0, 4, 2, 
3,4, 2, 1,0, 
3,4, 2, 0,1, 
3,4,1,2, 0, 
3,4, 1,0, 2, 
3,4, 0, 2,1, 
3,4, 0,1,2, 
3,0, 2,1,4, 
3,0, 2, 4,1, 
3,0,1,2, 4, 
3,0, 1,4,2, 



3,0, 4,2,1, 
3,0, 4,1,2, 
4, 2,3, 1,0, 
4,2,3,0,1, 
4, 2, 1, 3, 0, 
4, 2,1,0,3, 
4, 2, 0,3,1, 
4, 2, 0,1,2, 
4,3,2,1,0, 
4,3,2, 0,1, 
4,3,1,2, 0, 
4,3,1,0, 2, 
4,3,0, 2,1, 
4,3,0,1,2, 
4,1,2,3,0, 
4,1,2, 0,3, 
4, 1,3,2, 0, 
4,1,3,0, 2, 
4, 1,0, 2,3, 
4, 1,0,3,2, 
4, 0, 2,3, 1, 
4, 0, 2,1,3, 



4,0,3,2,1", 
4, 0,3,1,2, 
4,0, 1,2,3, 
4,0,1,3,2, 
0,2,3,4,1, 
0,2,3,1,4, 
0,2,4,3,1, 
0, 2,4, 1,3, 
0, 2,1,3,4, 
0, 2,1,4, 2, 
0,3,2,4,1, 
0,3,2,1,4, 
0,3,4, 2,1, 
0,3,4, 1,2, 
0,3,1,2,4, 
0,3, 1,4, 2, 
0, 4, 2,3,1, 
0, 4, 2, 1,3, 
0, 4,3,2,1, 
0, 4,3,1,2, 
0, 4, 1,2,3, 
0, 4, 1,3,2, 



0, 1,2,3,4, 
0,1,2, 4,3, 
0,1,3,2,4, 
0, 1,3,4,2, 
0, 1,4,2,3, 
0,1,4,3,2}; 



4, 3, 1., 0, 
4, 3, 0, 2, 
4, 3, 0, I, 
4, 1, 2, 3, 
4, 1, 2, 0, 
4, 1, 3, 2, 
4, 1, 3, 0, 
4, 1, 0, 2, 
4, 1, 0, 3, 
4, 0, 2, 3, 
4, 0, 2, 1, 
4, 0, 3, 2, 
4, 0, 3, 1, 
4, 0, 1, 2, 
4, 0, 1, 3, 
0, 2, 3, 4, 
0, 2, 3, 1, 
0, 2, 4, 3, 
0, 2, 4, 1, 
0, 2, 1, 3, 
0, 2, 1, 4, 
0, 3, 2, 4, 
0, 3, 2, 1, 
0, 3, 4, 2, 
0, 3, 4, 1, 
0, 3, 1, 2, 
0, 3, 1, 4, 
0, 4, 2, 3, 
0, 4, 2, 1, 
0, 4, 3, 2, 
0, 4, 3, 1, 



0, 4, 1, 2, 

0, 4, 1, 3, 

0, 1, 2, 3, 

0, 1, 2, 4, 

0, 1, 3, 2, 

0, 1, 3, 4,' 

0, 1, 4, 2, 

0, 1, 4, 3}; 

byte fiveByFive[] = { 1, 2, 3, 4, 0, 



1, 


2, 


3, 


0, 


4, 


1, 


2, 


4, 


3, 


0, 


1, 


2, 


4, 


0, 


3, 


1,- 


2, 


0, 


3, 


4, 


1, 


2, 


0, 


4, 


2, 


1, 


3, 


2, 


4, 


0, 


1, 


3, 


2, 


0, 


4, 


1, 


3, 


4, 


2, 


0, 


1, 


3, 


4, 


0, 


2, 


1, 


3, 


0, 


2, 


4, 


1, 


3, 


0, 


4, 


2, 


1, 


4, 


2, 


3, 


0, 


1, 


4, 


2, 


0, 


3, 


1, 


4, 


3, 


2, 


0, 


1, 


4, 


3, 


0, 


2, 


1, 


4, 


0, 


2, 


3, 


1, 


4, 


0, 


3, 


2, 


1, 


0, 


2, 


3, 


4, 


1, 


0, 


2, 


4, 


3, 


1, 


0, 


3, 


2, 


4, 


1, 


0, 


3, 


4, 


2, 



1, 0, 4, 3, 2, 

2, 1, 3, 4, 0, 
2, 1, 3, 0, 4, 
2, 1, 4, 3, 0, 
2, 1, 4, 0, 3, 
2, 1, 0, 3, 4, 
2, 1, 0, 4, 1, 
2, 3, 1, 4, 0, 
2, 3, 1, 0, 4, 
2, 3, 4, 1, 0, 
2, 3, 4, 0, 1, 
2, 3, 0, 1, 4, 
2, 3, 0, 4, 1, 
2, 4, 1, 3, 0, 



2, 


4, 


1, 


o, 


3, 


2, 


4, 


3, 


1, 


0, 


2, 


4, 


3, 


0, 


1, 


2, 


4, 


0, 


1, 


3, 


2, 


4, 


0, 


3, 


1, 


2, 


o, 


1, 


3, 


4, 


2, 


0, 


1, 


4, 


3, 


2, 


0, 


3, 


1, 


4, 


2, 


0, 


3, 


4, 


1, 


2, 


0, 


4, 


1, 


3, 


2, 


0, 


4, 


3, 


1, 


3, 


2, 


1, 


4, 


0, 


3, 


2, 


1, 


0, 


4, 


3, 


2, 


4, 


1, 


0, 


3, 


2, 


4, 


0, 


1, 



3, 2, 0„ 1, 4, 
3, 2, 0, 4, 2, 
3, 1, 2, 4, 0, 
3, 1, 2, 0, 4, 
3, 1, 4, 2, 0, 
3, 1, 4, 0, 2, 
3, 1, 0, 2, 4, 
3, 1, 0, 4, 2, 
3, 4, 2, 1, 0, 
3, 4, 2, 0, 1, 
3, 4, 1, 2, 0, 
3, 4, 1, 0, 2, 
3, 4, 0, 2, 1, 
3, 4, 0, 1, 2, 
3, 0, 2, 1, 4, 
3, 0, 2, 4, 1, 
3, 0, 1, 2, 4, 
3, 0, 1, 4, 2, 
3, 0, 4, 2, 1, 

3, 0, 4, 1, 2, 

4, 2, 3, 1, 0, 
4, 2, 3, 0, 1, 
4, 2, 1, 3, 0, 

4, 2, 1, 0, 3, r 

4, 2, 0, 3, 1, 

4, 2, 0, 1, 2, 

4, 3, 2, 1, 0, 

4, 3, 2, 0, 1, 

4, 3, 1, 2, 0, 

4, 3, 1, 0, 2, 

4, 3, 0, 2, 1, 



4, 3, 0, 1, 2, 
4, 1, 2, 3, 0, 
4, 1, 2, 0, 3, 
4, 1, 3, 2, 0, 
4, 1, 3, 0, 2, 
4, 1, 0, 2, 3, 
4, . 1, 0, 3, 2, 
4, 0, 2, 3, 1, 
4, 0, 2, 1, 3, 
4, 0, 3, 2, 1, 
4, 0, 3, 1, 2, 
4, 0, 1, 2, 3, 
4, 0, 1, 3, 2, 
0, 2, 3, 4, 1, 
0, 2, 3, 1, 4, 
0, 2, 4, 3, 1, 
0, 2, 4, 1, 3, 
0, 2, 1, 3, 4, 
0, 2, 1, 4, 2, 
0, 3, 2, 4, 1, 
0, 3, 2, 1, 4, 
0, 3, 4, 2, 1, 
0, 3, 4, 1, 2, 
0, 3, 1, 2, 4, 
0, 3, 1, 4, 2, 
0, 4, 2, 3, 1, 
0, 4, 2, 1, 3, 
0, 4, 3, 2, 1, 
0, 4, 3, 1, 2, 
0, 4, 1, 2, 3, 



0, 


4, 


1., 


3, 


2, 


0, 


1, 


2, 


3, 


4, 


0, 


1, 


2, 


4, 


3, 


0, 


1, 


3, 


2, 


4, 


0, 


1, 


3, 


4, 


2, 


0, 


1, 


4i 


2, 


3, 


o, 


1, 


4, 


3, 


2) 



// File: ,NH_getErrorText . cpp 
// 

// Description: 
// 

// Implementation to the NH_getEr rorText function. This 

function can 

// be used to return the error text for an associated erro: 

code . 

// 

// 

// History: 
// 

// 6/23/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 

#include "NH_get_error_text . h" 
#include <string . h> 



void NH get_error_text (NHReturnCode errorCode, char *textBuf f er , int 

maxChars) 

{ 

char * errorMsgPtr; 

switch (errorCode) { 

case NH_SUCCESS: 

errorMsgPtr = "Operation successful"; 

break; 
case NHJ4ATCH: 

errorMsgPtr - "The comparison matched"; 

break; 
case NH_NO_MATCH: 

errorMsgPtr = "The comparison did not match"; 

break; 

case N H_I N VAL I D_S CORE_T H RE S H : 

errorMsgPtr = "The threshold must be between 0.0 and 



1.0"; 



break; 

case NH_INVALID_GN_INIT_SCORE : 

errorMsgPtr = "The GN initial score must be between 



0.0 and 1.0"; 

break; 

case NH_INVALID_NH_INIT_SCORE : 

errorMsgPtr = "The SN initial score must be between 

0.0 and 1.0"; 

break; 

case NH_INVALID_GN_INIT_ON_INIT_MATCH_SCORE: 

errorMsgPtr = "The GN initial on intial match score 
must be between 0.0 and 1.0"; 

break; 

case NH_INVALID_NH_INIT_ON_INIT_MATCH_SCORE: 

errorMsgPtr = "The SN initial on intial match score 
must be between 0.0 and 1.0"; 

break; 

case NH_INVALID_NFN_SCORE: 

errorMsgPtr = "The NFN score must be between 0.0 and 

1.0"; 



1.0"; 



1.0"; 



1.0"; 



and 1.0"; 



and 1.0"; 



and 1.0"; 



and 1.0"; 



break; 

case N H_I N VAL I D_FN U_S CORE : 

errorMsgPtr = "The FNU score must be between 0.0 and 

break; 

case NH_INVALID_NLN_SCORE : 

errorMsgPtr = "The NLN score must be between 0.0 and 

break; 

case NH_INVALIDJLNU_SCORE : 

errorMsgPtr = "The LNU score must be between 0.0 and 

break; 

case NH_INVALID_GN_ANCHOR_FACTOR : 

errorMsgPtr = "The GN anchor score must be between 0.0 

break; 

case NH_INVALID_NH_ANCHOR_FACTOR: 

errorMsgPtr = "The SN anchor score must be between 0.0 

break; 

case NH_INVALID_GNjDOPS_FACTOR: 

errorMsgPtr = "The GN OOPS factor must be between 0.0 

break; 

case NH_INVALID_NH_OOPS_FACTOR: 

.errorMsgPtr = "The SN OOPS factor must be between 0.0 



break; 

case NH_INVALID_ABS_DEL_GN_TAQ_FACTOR: 

errorMsgPtr = "The Abs delete GN TAQ factor must be 
between 0.0 and 1.0"; 

break; 

case NH_INVALID_ABS_DIS_GN_TAQ__FACTOR: 

errorMsgPtr = "The Abs disregard GN TAQ factor must be 
between 0.0 and 1.0"; 

break; 

case NH_INVALID_ABS_DEL_NH_TAQ_FACTOR: 

errorMsgPtr « "The Abs delete SN TAQ factor must be 
between 0.0 and 1.0"; 

break; 

case NH_I NVAL I D_ABS_DI S_NH_TAQ_ FACTOR : 

errorMsgPtr = "The Abs disregard SN TAQ factor must be 
between 0.0 and 1.0"; 

break; 

case NH_INVALID_DEL_GN_TAQ_FACTOR : 

errorMsgPtr - "The delete GN TAQ factor must be 
between 0.0 and 1.0"; 

break; 

case NH_I NVAL I D_DI S_GN_T AQ_FACTOR : 

errorMsgPtr = "The disregard GN TAQ factor must be 
between 0.0 and 1.0"; 

break; 

case NH_INVALID_DEL_NH_TAQ_FACTOR: 

errorMsgPtr = "The delete SN TAQ factor must be 

between 0.0 and 1.0"; 

break; 

case NH_INVALI D_DI S_NH_TAQ_FACTOR : 

errorMsgPtr = "The disregard SN TAQ factor must be 
between 0.0 and 1.0"; 

break; 



case NH_INVALID_GN_C0MPRESSED_NAME_SC0RE:' 
; errorMsgPtr = "The GN compressed name score must be 

between 0.0 and 1.0"; 

break; 

case NH_INVALID_NH_CQMPRESSED_NAME_SCORE : 

errorMsgPtr = "The SN compressed name score must be 
between 0.0 and 1.0"; 

break; 

case NH__RESULTS_LIST_INSERT_ALLOC_FAILURE : 

errorMsgPtr = "Could not allocate space for a new 

results list"; 

break; 

case NH_GN_VAR_TABLE_CREATION_ERROR: ■ 

errorMsgPtr = "Problem creating GN variant table"; 
break; 

case NH_NH_VAR_TABLE__CREATION_ERROR: 

errorMsgPtr = "Problem creating SN variant table"; 
break; 

' case NH_TAQ_TABLE_CREATION_ERROR: ' 

errorMsgPtr = "Problem creating TAQ table"; 
* break; 

case NH_SEG_BREAK_CHARS_CREATION_ERROR': 

errorMsgPtr = "Problem creating segment break 
characters strings- 
break; 

case NH_NOISE_CHARS_CREATION_ERROR : 

errorMsgPtr = "Problem creating noise characters 



string" ; 



list"; 



storage' 



invalid" ; 



record" ; 



break; 

case N H_I N VAL I D_RES ULT S_L I S T_S I Z E : 

errorMsgPtr = "Invalid size requested for results 

break; 

case NH_RESULTS_LIST_ALLOCATION_ERROR: 

errorMsgPtr = "Problem creating internal results lis 

break; 

case NH_RESULTS_ARRAY_NULL_ERROR: 

errorMsgPtr = "Internal results list storage is 

break; 

case NH_TAQ_RECORD_ALLOC_ERROR : 

errorMsgPtr = "Problem allocating space for new TAQ 



break; 

case NH_VARIANT_ALLOC_ERROR: 

errorMsgPtr = "Problem allocating space for new 

variant record"; 

break; 

case NH_VARIANTS_DONT_EXIST: 

errorMsgPtr - "The supplied names are not currently 

variants" ; 

break; 

case NH_INVALID_VARIANT_SCORE: 

errorMsgPtr = "Variant scores must be between 0.0 ar 

1.0"; 

break; 

case NH_MAX_VARI ANT_S I ZE_I NCREMENT_FAI LED : 

errorMsgPtr = "Could not increase variant storage tc 
add new variant relationship"; 

break; 



case NH_VARIANT__AL.READY_RELATED: 

errorMsgPtr = "The names are already related to each 

others- 
break; 

case NH_COMP_PARMS_BAD_STREAM_ON_CONSTRUCT : 

errorMsgPtr = "The comp parameters' stream passed to 
the constructor is invalid"; 

breaks- 
case N H_COMP_P ARMS_B AD_S T RE AM_ON_ARC HIVE : 

errorMsgPtr = "The comp parameters stream passed to 
the archiveData method is invalid"; 

breaks- 
case NH__NAME_PARMS_FILE_NOISE_CHARS_ERROR : 

errorMsgPtr = "The noise characters could not be 

read"; 

breaks- 
case NH_NAME_PARMS_FILE_BREAKS_CHARS_ERROR: 

errorMsgPtr = "The break characters could not be 

read"; 

breaks- 
case NH_NAME_PARMS_BAD_STREAM_ON_CONSTRUCT : 

errorMsgPtr = "The Name Parameters stream passed to 
the constructor was bad"; 

break; 

case NH_NAME_PARMS_BAD_STREAM_ON_WRITE : 

errorMsgPtr = "The Name Parameters stream passed to 
the archive method was bad"; 

breaks- 
case NH_NAME_PARMS_FILE_BAD_CULTURE_CODE : 

errorMsgPtr = "The culture code read from the Name 
parameters stream was invalid"; 

breaks- 
case NH_TAQ_NOT__FOUND : 

errorMsgPtr = "The specified TAQ could not be found"; 
breaks- 
case NH_TAQ_ALREADY_EXISTS : 

errorMsgPtr = "The specified TAQ is already defined"; 
break; 

case N H_I N VAL I D_GN_T H RE S H : 

errorMsgPtr = "The GN Threshold must be between 0.0 



and 1.0"; 



and 1.0"; 



1. OS- 



LO"; 



breaks- 
case NH_INVALID_NH_THRESH: 

errorMsgPtr = "The SN Threshold must be between 0.0 

breaks- 
case NH_INVALID_GN_WEIGHT : 

errorMsgPtr = "The GN Weight must be between 0.0 and 

breaks- 
case NH_I N VAL I D_NH_WE I GHT : 

errorMsgPtr = "The SN Weight must be between 0.0 and 



breaks- 
case NH_I N VAL I D_CULTURE_CODE : 

errorMsgPtr = "The specified culture code is invalid" 

breaks- 
case NH_ERROR_READING_CUSTOM_PARAMETER_FROM_FILE : 

errorMsgPtr = "A problem was encounter when reading a 
custom parameter from a file"; 

break; 



' case NH_ERROR_WRITING_CUSTOM_PARAMETER_TO_FILE: 

errorMsgPtr = "A problem was encounter when writing a 
custom parameter to a file"; 

break; 
default: 

errorMsgPtr = "Unknown Error"; 
break; 

} 

strncpy ( textBuf f er , errorMsgPtr, maxChars) ; 
textBuf fer [maxChars j = ECS; 



// File: NH_culture_codes . cpp 
// ' 

// Description: 

// 

// Definition of global array of culture code strings 

// 

// 

// History: 
// 

// 9/12/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 



#include <string.h> 

Hnclude "NH_culture_codes . h" 



// The following two global arrays must be the same size. 

// That is, they must have the same number of elements. 

// If you add or remove items, you must also update the 

// constant NH_NUM_CULTURE_CODES 

// In addition, they must maintain the same relative order 

// (for example, Arabic must be in the same position in both 

// arrays) . 

// lastly, this stuff must match the NHParmsType enum type, 

// both in number and relative position. The NH_NUM_PARMS_TYPES 

// must also be kept in sync as well. 

char *NH_culture_codes [] = { NH_CULTURE_CODE_ANGLO, 

NH_CULTURE_CODE_ARABIC, 

NH_CULTURE_CODE_CHINESE, 

NH_CULTURE_CODE_GENERIC , 

NH_CULTURE_CODE__H IS PANIC, 

NH_CULTURE_CODE_KOREAN , 

NH_CULTURE_CODE_RUSSIAN} ; 

char *NH_culture_strings [] = { NH_CULTURE_STRING_ANGLO, 

NH_CULTURE_STRING_ARABIC, 

NH_CULTURE_STRING_CHINESE, 

NH_CULTURE_STRING_GENERIC, 

NH_CULTURE_STRING_HISPANIC, 

NH_CULTURE_STRING_KOREAN, 

NH_CULTURE_STRING_RUSSIAN } ; 

bool NH_validate_culture_code (NHCultureCode cultureCode) 
{ 

bool found = false; 



for (int i - 0; i < NH_NUM__CULTURE_CODES ; i + +) ( 

if ( ! strncmp(cultureCode, NH_culture_codes [ i ] , 
NH_MAX_CULTURE_CODE_LEN) ) { 

found = true; 
break; 

} 

} 

return found; 



// File: namehunter.h 

// 

// Description: 

// shutdown and startup functions for the NameHunter system. 

// These are really just blind interfaces to the 

// nh variant_taq__globals functions. We do this because 

// we~want to hide the details of the variants and TAQs 

// from the API user. 

// 
// 

// History: 

9/9/97 • EFB Created 

3/20/98 EFB Changed names to NH from SN 

"namehunter.h" 
"NHVariantTable. hpp" 
"NHTAQTable . hpp" 
"NH_variant_taq_globals . h" 
"NHDigraphBitmapArray.hpp" . 



NHVariantTable *NH_snVariantTable ; 
NHVariantTable +NH_gnVariantTable ; 
NHTAQTable *NH_taqTable ; 



II 
II 

#include 
#include 
#include 
#include 
#include 



extern 
extern 
extern 



NHDigraphBitmapArray globalDigraphBitmapAr ray ; 



void NH_startup() 

1 NH getVariantTable (NH_SURNAME_VARIANTS ) ; 

NH~getVariantTable (NH_GIVENNAME_VARIANTS) ; 
NH_getTAQTable () ; 



void NH_shutdown ( ) 

if (NH_snVariantTable ! = NULL) { 
delete NH_snVariantTable; 
NH_snVariantTable = NULL; 

if (NH_gnVariantTable != NULL) { 
delete NH_gnVariantTable ; 
NH_gnVariantTable = NULL; 

} 

if (NH_taqTable != NULL) { 
delete NH_taqTable; 
NH taqTable = NULL; 



} 



// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
.// 
// 
// 



File: NH^getErrorText.cpp 



Description: 



Implementation to the NH_getErrorText function. This function can 
be used to return the error text for an associated error code. 



History: 



6/23/97 
3/20/98 



EFB 
EFB 



Created 

Changed names to NH from SN 



#include 



'NHLge^errorJiext.h 1 



//include 



<string.h> 



void NH_get_error_text(NHReturnCode errorCode, char *textBuffer, int maxChars) 



char * errorMsgPtr; 

switch (errorCode) { 

case NH__SUCCESS: 

errorMsgPtr = "Operation successful"; 

break; 
case NH_MATCH: 

errorMsgPtr = "The comparison matched"; 

break; 

case NH_NO_MATCH: 

errorMsgPtr = "The comparison did not match"; 
break; 

case NH_INVALID_SCORE_THRESH: 

errorMsgPtr = "The threshold must be between 0.0 and 1.0"; 
break; 

case NH_INVALID_GN_INIT_SCORE: 

errorMsgPtr = "The GN initial score must be between 0.0 and 1 .0 
break; 

case NH_INVALID_NH_INIT_SCORE: 

errorMsgPtr = "The SN initial score must be between 0.0 and 1.0' 
break; 



{ 



case 



NH INVALID GN INIT ON INIT MATCH SCORE: 



errorMsgPtr - "The GN initial on intial match score must be 

between 0.0 and 1.0"; 

break; 

case NH_INVALID_NH_INIT_ON_INIT_MATCH_SCORE: 

errorMsgPtr = "The SN initial on intial match score must be 

between 0.0 and 1.0"; 

break; 

case NH_INVALID_NFN_SCORE: 

errorMsgPtr = "The NFN score must be between 0.0 and 1 .0"; 
break; 

case NH_INVALIDJFNU_SCORE: 

errorMsgPtr = "The FNU score must be between 0.0 and 1 .0"; 
break; 

case NH_INVALID_NLN_SCORE: 

errorMsgPtr = "The NLN score must be between 0.0 and 1 .0"; 
break; 

case NH_INVALID_LNU_SCORE: 

errorMsgPtr = "The LNU score must be between 0.0 and 1.0"; 
break; 

case NH_INVALID_GN_ANCHOR_FACTOR: 

errorMsgPtr - "The GN anchor score must be between 0.0 and 

1.0"; 

break; 

case NH_INVALID_NH_ANCHOR_FACTOR: 

errorMsgPtr = "The SN anchor score must be between 0.0 and 

1.0"; 

break; 

case NH_INVALID_GN_OOPS_FACTOR: 

errorMsgPtr = "The GN OOPS factor must be between 0.0 and 

1.0"; 

break; 

case NH_INVALID_NH_OOPS_FACTOR: 

errorMsgPtr = "The SN OOPS factor must be between 0.0 and 

1.0"; 

break; 

case NH_INVALID_ABS_DEL_GN_TAQ_F ACTOR: 

errorMsgPtr = "The Abs delete GN TAQ factor must be between 

0.0 and 1.0"; 

break; 

case NH_INVALID_ABS_DIS_GN_TAQ_F ACTOR: 

errorMsgPtr = "The Abs disregard GN TAQ factor must be 

between 0.0 and 1.0"; 

break; 

case NHINVALIDABSDELNHTAQF ACTOR: 



errorMsgPtr = "The Abs delete SN TAQ factor must be between 

0.0 and 1.0"; 

break; 

case NHJNVALID_ABS_DIS_NH_TAQ_F ACTOR: 

errorMsgPtr = "The Abs disregard SN TAQ factor must be 

between 0.0 and 1.0"; 

break; 

case NH_INVALID_DEL_GN_TAQ_F ACTOR: 

errorMsgPtr = "The delete GN TAQ factor must be between 0.0 

and 1.0"; 

break; 

case NH_INVALIDJDIS_GN_TAQ_F ACTOR: 

errorMsgPtr = "The disregard GN TAQ factor must be between 0.0 

and 1.0"; 

break; 

case NH_INVALID_DEL_NH_TAQ_FACTOR: 

errorMsgPtr = "The delete SN TAQ factor must be between 0.0 

and 1.0"; 

break; 

case NH_INVALID_DIS_NH_TAQ_FACTOR: 

errorMsgPtr = "The disregard SN TAQ factor must be between 0.0 

and 1.0"; 

break; 

case NH_INYALID_GN_COMPRESSED_NAME_SCORE: 

errorMsgPtr - "The GN compressed name score must be between 

0.0 and 1.0"; 

break; 

case NHJNVALID_NH__COMPRESSED_NAME_SCORE: 

errorMsgPtr = "The SN compressed name score must be between 

0.0 and 1.0"; 

break; 

case NH_RESULTS_LIST_INSERT_ALLOC_FAILURE: 

errorMsgPtr = "Could not allocate space for a new results list"; 
break; 

case NH_GN_VAR_TABLE_CREATION_ERROR: 

errorMsgPtr = "Problem creating GN variant table"; 
break; 

case NH_NH_VAR_TABLE_CREATION_ERROR: 

errorMsgPtr = "Problem creating SN variant table"; 
break; 

case NH_TAQ_TABLE_CREATION_ERROR: 

errorMsgPtr = "Problem creating TAQ table"; 
break; 

case NH_SEG_BREAK_CHARS_CREATION_ERROR: 

errorMsgPtr = "Problem creating segment break characters string"; 



break; 

case NH_NOISE_CHARS_CREATION_ERROR: 

errorMsgPtr = "Problem creating noise characters string"; 
break; 

case NH_INVALID_RESULTS_LIST_SIZE: 

errorMsgPtr = "Invalid size requested for results list"; 
break; 

case NH_RESULTS_LIST_ALLOCATION_ERROR: 

errorMsgPtr = "Problem creating internal results list storage";, 
break; 

case NH_RESULTS_ARRAY_NULL_ERROR: 

errorMsgPtr = "Internal results list storage is invalid"; 
break; 

case NH_TAQ_RECORD_ALLOC_ERROR: 

errorMsgPtr = "Problem allocating space for new TAQ record"; 
break; 

case NH_VARIANTiALLOC_ERROR: 

errorMsgPtr = "Problem allocating space for new variant record" 
break; 

case NH_VARJANTS_DONT_EXIST: 

errorMsgPtr = "The supplied names are not currently variants"; 
break; 

case NH_INVALID_VARIANT_SCORE: 

errorMsgPtr = "Variant scores must be between 0.0 and 1 .0"; 
break; 

case NH_MAX_VARIANT_SIZE_INCREMENT_F AILED: 

errorMsgPtr = "Could not increase variant storage to add hew 

variant relationship"; 

break; 

case NH_V ARI ANT_ALRE AD Y_REL ATED : 

errorMsgPtr = "The names are already related to each other"; 
break; 

case NH_COMP_PARMS_BAD_STREAM_ON_CONSTRUCT: 
errorMsgPtr = "The comp parameters stream passed to the 
constructor is invalid"; 

break; 

case NH_COMP_PARMS_BAD_STREAM_ON_ARCHIVE: 
errorMsgPtr = "The comp parameters stream passed to the 
archiveData method is invalid"; 

break; 

case NH_NAME_PARMS_FILE_NOISE_CHARS_ERROR: 
errorMsgPtr = "The noise characters could not be read"; 
break; 

case NH_NAME_PARMS_FILE_BREAKS_CHARS_ERROR: 
errorMsgPtr = "The break characters could not be read"; 



break; 

case NH_NAME_PARMS_BAD_STREAM_ON_CONSTRUCT: 
errorMsgPtr = "The Name Parameters stream passed to the 

constructor was bad"; 

break; 

case NH_NAME_PARMS_BAD_STRJEAM_ON_WRITE: 

errorMsgPtr = "The Name Parameters stream passed to the archive 

method was bad"; 

break; 

case NH_NAME_PARMS_FILE_BAD_CULTURE_CODE: 

errorMsgPtr = "The culture code read from the Name parameters 

stream was invalid"; 

break; 

case NH_TAQ_NOT_FOUND: 

errorMsgPtr = "The specified TAQ could not be found"; 
break; 

case NH_TAQ_ALR£ADY_EXISTS : 

errorMsgPtr = "The specified TAQ is already defined"; 
break; 

case NH_INVALID_GN_THRESH: 

errorMsgPtr = "The GN Threshold must be between 0.0 and L0"; 
break; 

case NH_INVALID_NH_THRESH: 

errorMsgPtr = "The SN Threshold must be between 0.0 and 1 .0"; 
break; 

case NH_INVALID_GN_ WEIGHT : 

errorMsgPtr = "The GN Weight must be between 0.0 and 1 .0"; 
break; 

case NH_INVALID_NH_WEIGHT: 

errorMsgPtr = "The SN Weight must be between 0.0 and 1 .0"; 
break; 

case NH_INVALID_CULTURE_CODE: 

errorMsgPtr = "The specified culture code is invalid"; 
break; 

case 

NH_ERROR_READING_CUSTOM_PARAMETER_FROM_FILE: 

errorMsgPtr = "A problem was encounter when reading a custom 
parameter from a file"; 

break; 

case NH_ERROR_WRITING_CUSTOM_PARAMETER_TO_FILE: 
errorMsgPtr = "A problem was encounter when writing a custom 

parameter to a file"; 

break; 

default: 

errorMsgPtr = "Unknown Error"; 



. break; 

} 

strncpy(textBuffer, errorMsgPtr, maxChars); 
textBuffer[maxChars] = EOS; 



// 
// 
// 
// 
// 
// 
// 
// 
II 
II 
II 
II 
II 
II 
II 



File: hamehunter:h 



Description: 



shutdown and startup functions for the NameHunter system. 
These are really just blind interfaces to the 
NH_variant_taq_globals functions. We do this because 
we want to hide the details of the variants and TAQs 
from the API user. 



History: 



9/9/97 EFB 
3/20/98 



Created 



EFB 



Changed names to NH from SN 



#include "namehunter.h" 

#include "NHVariantTable.hpp" 

#include "NHTAQTable.hpp" 

#include "NH_variant_taq_globals.h" 

#include "NHDigraphBitmapArray.hpp" 



extern NHVariantTable 
extern NHVariantTable 
extern NHTAQTable 



*NH_snVariantTable; 
*NH_gnVariantTable; 
*NH_taqTable; 



NHDigraphB itmapArray globalDigraphBitmap Array ; 



void NH_startup() 
{ 

NH_getVariantTable(NH_SURNAME_VARIANTS); 

NH_getVariantTable(NH_GIVENNAME_VARIANTS); 

NH_getTAQTable(); 

} 



void 

{ 



NH_shutdown() 

if (NH_snVariantTable != NULL) { 
delete NHsnVariantTable; 
NH_snVariantTable = NULL; 

} 



if (NH_gnVariantTable !=NULL). { 
delete NH_gnVariantTable; 
NH_gnVariantTable = NULL; 

} 

if(NH_taqTable!=NULL) { 
delete NH_taqTable; 
NHjaqTable = NULL; 



// File : JMVariantTable . hpp 

// . 

// Description: 

// 

// Interface to the NHVariantTable class. 

// 

// 

// History: 
// 

// 5/7/97 EFB Created 

// 6/23/97 EFB Changed processing to get rid of 

variant types 

// as assign an 

individual score for each variant pair. 

// 6/23/97 EFB Enhanced comments. 

// 9/9/97 EFB Added support' for a culture code in 

the variant object, 

// which required 

changes to this object's interaction 

// with the NHVariant 

class. 

// 3/20/98 EFB Changed names to NH from SN 

// 



Variant information consists of two names that are related, along 
with a designation of variant type, which, describes how the two 
names are related. 

The following holds true in our model: 

if Name A is related to name B with varType V, then B is 
related to A with varType V. 

When constructing the table, 

only one of the pairs (A, B) or (B, A) should be entered. 

internals will ensure that a request of "is B related to A" 

a request of "is A related to B" will work. 

Name variants are single segments. 

Internally, we represent the information as a hash table of 
NH_VarHashTableRecord structures. Each of these structures 
contains a name string, plus a Variant object. 
Each Variant object (a separate class) has the following: 

NHVarld id; 

// unique id for each variant 

byte numRelatedVariants; // number of 

other variants we are related to 

NHVarld variants [MAX_VARIANTS_PER_NAME] 

// array of id's 

double varScores [MAX_VARIANTS_PER_NAME] // 

score for each variant 



The 
and 



// as related to this variant 



short int varCultures [MAX_VARIANTS_PER_NAME] // score 

for each variant 



// as related to this variant 

The name of the variant is actually stored in the hash table node, 

rather 

than the variant object. 

There are three important functions in the VariantTable class: 

bool addVariant (char *namel, char *name2, 

NHVarType varType, char *cultCode) ; 

NHVariant getVariantOb j ectName (char *name) ; 
NHVarld getVariantldForName (char *name) ; 

// The Variant has the method: 

double getVariantScoreForldAndCulture (NHVarld varld, 

char *cultureCode) ; 

The variant table is built by multiple calls to addVariant ( ) from 

the 

constructor. There is one call to addVariant () for each pair of 

names 

that are related. 

addVariant ( ) takes 2 names that are related, along with a culture 
code to 

describe the relationship. 

getVariantlnfoForName returns the NHVariant object associated with 

the 

name (or NULL) . 

getVariantldForName ( ) returns the id associated with the name. 
Typically, a QueryNameData object gets a pointer to it's variant 

object 

up front. Each time is gets compared to an EvalNameData object, 

it 

calls the getVariantldForName ( ) method to get an id, which it then 

passes 

the to the getVariantScoreForld ( ) to see if the two are related. 

V 



# i f nde f NHVARI ANTTABLE_H P P 

#define NHVARI ANTTABLE HPP 



#include "NHVariant . hpp" 
#include u NH_get_error_text . h M 



// define a const for end of string 
#ifndef EOS 



#def irte 
iendif 



EOS ' \0' 



// how long can a variant be ? 

#ciefine NH_MAX_VARIANT_LEN 30 



// define a type to specify the type of variant table 

// types are defined by a combination of culture and 

// name field, 

enum NH_VARIANT_TABLE_TYPES 

{ 

NH_SURNAME_VARI ANTS , 
NH_GI VENNAME_VARI ANTS , 
NH_EMPTY_VARIANTS 

}; 



// define a record in the Variant hash table 
typedef struct NH_VAR_HASH_TABLE_RECORD_T { 
char 

segment [NH_MAX_VARIANT_LEN + 1] ; 

NHVariant 

* variant ; 

struct 

NH_VAR_HASH_TABLE_RECORD_T *next; // pointer to 

next node in hash chain 
} NH_VarHashTableRecord; 

// Do not change without seeing member function hash{). 
#define NH_MAX_VAR_HASHJTABLE__NODES 907 

// define a type that is a pointer to a NH_VarTableRecord 
typedef NH_VarHashTableRecord *NH_VarHashTableRecordPtr ; 

// define a type that is a table (array) of NH_VarTableRecord 
typedef NH_VarHashTableRecordPtr 

NH VariantHashTable [NH_MAX_VAR_HASH_TABLE_NODES] ; 



class NHVariantTable 
{ 

public : 

NHVariantTable (NH_VARIANT_TABLE_TYPES tableType) ; 
virtual -NHVariantTable () ; 



// returns the NHVariant object associated with the name, 
// or NULL is there is no object for the name. 
NHVariant * getVariantObj ect ForName (char *name) ; 

// returns the NHVarld associated with the name. If 

there is 

// no variant for the name, the function returns 
NH_VAR_NOT_FOUND . 

NHVarld getVariantldForName (char *name) ; 

NHReturnCode getStatusO (return 

status; } 



NHReturnCode addVar iant ( char *namel, 

char *name2, double varScore, char *cultCode) ; 

int getNumHashBuckets { ) (return 

NH_MAX_VAR_HASH_tABLE_NODES ; } 

NH VarHashTableRecordPtr getHashBucketStartNodeAt (int 

hashTablelndex) 

{return variantHashTable [hashTablelndex] ; } 

// function to change the score associated with two 

variants with a 

// specified culture. 

// The function return: 
// 

// NH_SUCCESS - if things worked out OK 

// NH_VARIANTS_DONT_EXIST - if the either name does 

not exist in the table 

" • 

or the names are not already variants of 

each 

// 

other with the specified culture. 
// NH_INVALID_VARIANT_SCORE - if the score is 

invalid 

NHReturnCode changeVar iantScore (char *namel, char 

*name2, char *cultureCode , double newScore) ; 

// a function to remove the relationship between two 

variants within 

// a specified culture. 

// This function is used for the VariantManager 

application. 

// If either variant ends up without a relationship after 

this 

// operation, it is left in, but when saved, the 
resulting file 

// will contain a rather than a related name. The 

function can 

// return 
// 

// NH_SUCCESS - if things worked out OK 

// NH_VARI ANT S_DONT_EX 1ST - if the names are not 

already variants 

NHReturnCode removeVariantRelat ion (char *namel, char 

*name2, char *cultureCode) ; 

// return the next available id, which is the number of 
// distinct variants in our table. 
NHVarld getNextAvailableVarld { ) { return 

nextAvailableVarld; } 

bool getDirtyU {return dirty;} 

void setDirty (bool aBool) {dirty = aBool;} 

protected: 

// add a variant relationship. 

virtual NHVariant * getOrCreateVariantOb j ect ForNam 



e (char *name) ; 

NHVarld nextAvailableVarld; 

NH_VariantHashTable variantHashTable; 
NHReturnCode status; // are we 



valid 



bool dirty; // have we changed 



// Returns an integer in the range [0, 
NH_MAX_VAR_HASH_TABLE_NODES ] . 

inline unsigned int NHVariantTable :: hash (char ^string) 
. { 

char *p; 
unsigned int i; 
unsigned int sum; 



2) 

} 

private: 

}; 



for {p = string, i = 2, sum = 0; *p ! = EOS; p++, i + = 

sum += i * *p; 
return sum % NH_MAX_VAR_HASH_TABLE_NODES; 
// hash 



#endif 



// File: NHVariantTable . cpp • 

// 1 

// Description: 

// 

// Implementation to the NHVariantTable class. 

// 

// 

// History: 
// 

// 5/14/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 



iinclude <string.h> 
Iinclude <stdio.h> 



iinclude "NHVariantTable . hpp" 
#include "NH_util . hpp" 
ttinclude "NH culture codes. h" 



NHVariantTable: : NHVariantTable ( NH_VARI ANT_TABLE_T Y PES tableType) 
{ 

status = NH_SUCCESS; 
dirty = false; 

// clear out the hash table 
for (int i = 0; i < NH_MAX_VAR_HASH_TABLE_NODES ; i++) 

variantHashTable [i] = NULL; 



// initialize our variant id variable. 
nextAvailableVarld = 0; 



/* gnv test stuff 

addVariant ("ED", "EDWARD", 0.7, "E "); 
addVariant ("GERRY", "GENERIC", 0.7, "G "); 
addVariant ("HOP", "HOPSING", 0.7, "C "); 
addVariant ( "NASSIR" , "NARADMAN" , 0.7, "A " ) ; 
addVariant ("BORRIS", "NATASIA" , 0.7, "R"); 
addVariant ("JUAN", "EPSTEIN", 0.7, "H "); 
addVariant ("KORY", "KOREAN", 0.7, "H "); 



/* snv test stufff 

addVariant ("HUANG", "WONG", 0.7, "C " ) ; 

*/ 

// the following include lines are commented out because 
takes forever 

// to compile release versions when they are left in. 
if (tableType == NH_GIVENNAME_VARI ANTS ) { 
// ^include "gnvdata.h" 

} 

else if (tableType == NH_SURNAME_VARI ANTS ) ( 
// ^include "snvdata.h" 

} 

} 



// release* all the memory used to store NH_VarHashTableRecord 
pointers 

NHVariantTable : : -NHVariantTable ( ) 
{ 

NH_VarHashTableRecordPtr prevRecord; 
NH_VarHashTableRecordPtr varRecord; 

unsigned int tablelndex; 

for (tablelndex = 0; tablelndex < NH_MAX_VAR_HASH_TABLE_NODES ; 
tablelndex++) { 

varRecord = variantHashTable [tablelndex] ; 
while (varRecord != NULL) { 
prevRecord = varRecord; 
varRecord = varRecord->next ; " 
// delete the record we allocated, 
// as well as the " SNVariant object pointed to by 

the 

// variant member of .this record 
delete prevRecord->variant ; 
delete prevRecord; 

} 

} 

} 



// returns the NHVariant object associated with the name, 
// or NULL is there is no object for the name. 

NHVariant * NHVariantTable :: getVariantObject ForName (char *name) 

{ 

NHVariant *van 
ntObject = NULL; 

unsigned int tablelndex; 
NH VarHashTableRecordPtr tempRecordPtr; 



// find the hash value for the (possible) variant 
tablelndex = hash (name); 

// go throught the records in the chain at that offset in the 
// hash table, and try to find the variant we are looking for. 
tempRecordPtr = variantHashTable [tablelndex ] ; 
while (tempRecordPtr != NULL) { 

if ( ! strcmp ( tempRecordPtr->segment , name) ) { 

variantObject = tempRecordPtr->variant ; 

break; 

} 

else // move on to next record in the chain 

tempRecordPtr = tempRecordPtr->next ; 

} 

return variantObject; 

} 



// returns the NHVariant obje 
// or creates a new one. 
NHVariant * NHVariantTable 
*name) 
{ 

NHVariant * variantObj ect 



ct associated with the name, 

: igetOrCreateVariantObject ForName (char 

= getVariantObjectForName (name) ; 



if (variantObject == NULL) {. 

// no object existed before, so create one and add it 
// to the hash table. 

unsigned 

int tablelndex; 

NH_VarHashTableRecordPtr prevRecord; 

NH_VarHashTableRecordPtr newVariantHashTableRecord = 

new NH VarHashTableRecord; 



variantObject = new NHVariant (nextAvailableVarId++) ; 
if (variantObject ! = NULL) { 

// find the hash value for the name 

tablelndex = hash (name); 

// fill up the values in the record 
s'trncpy (newVariantHashTableRecord->segment , name , 
NH_MAX_VARI ANT__LEN ) ; 

newVariantHashTableRecord->segment [NH_MAX_VARIANT_LEN] 

= EOS; 

newVariantHashTableRecord->variant = variantObject; 
newVariantHashTableRecord->next = NULL; 

// now add the new record to the chain of entries 
// at that index. 

prevRecord = variantHashTable [tablelndex] ; 
if (prevRecord == NULL) 

variantHashTable [tablelndex] = 
newVariantHashTableRecord; 

else { 

while (prevRecord->next != NULL) { 
prevRecord = prevRecord->next ; 

} 

prevRecord->next - newVariantHashTableRecord; 

} 

} 

else 

status = NH_VARIANT_ALLOC_ERROR; 

} 

return variantObject; 

} 



// returns the NHVarld associated with the name. If there is 
// no variant for the name, the function returns NH_VAR_N0T_FOUND . 
NHVarld NHVariantTable : : getVariant IdForName ( char *name) 

{ 

NHVariant * variantOb j ect = getVariantOb j ect ForName ( name ) ; 

NHVarld returnld; 

if (variantObject != NULL) { 

returnld = variantOb j ect->getVariant Id () ; 

} 

else 

returnld = NH VAR NOT_FOUND; 



return returnld; 

} 



// Add a variant relationship. 
// In order to do this, we must: 
// 

// - make sure both names already have entries in the hash 

table 

// and if not, create them. 

// - get the id of each entry. 

// - add the id of each item to the variant information of 

the other. 

// 

// We handle the special case where the second name is a *. This 
means 

// that the name should be part of the variant table, but not related 
// to anything. In this case, 

// we only create (or get) a NHVariant object for the name. 
NHReturnCode NHVariantTable : : addVariant ( char *namel, char *name2, 

double varScore, 



char ^cultureCode) 



{ 



NHReturnCode rc = NH_SUCCESS; 

NHVariant * varOb j ect 1 ; 

NHVariant * varOb j ect 2 ; 



if ((varScore < 0.0) II (varScore > 1.0)) 

rc = NH_INVALID_VARIANT_SCORE; 
else { 

if (NH validate culture_code ( cultureCode ) ) { 



also create 
already 

the second 



// Get variant object for both names. This will 
// a new entry if the name(s) were not in the table 
varObjectl = getOrCreateVariantOb j ect ForName (namel } ; 
// if the second name was a *, skip the creation of 



// NHVariant object and do not associate the names, 
if (strcmp(name2, "*")) { 
varObject2 = 
getOrCreateVariantObjectForName (name2) ; 

if ((varObjectl != NULL) && (varObject2 != 

NULL) ) { 

// now associate each with the other, 

using the supplied variant type 

rc = varObjectl->addVariant (varObject2, 

cultureCode, varScore) ; 

if (rc ™ NH_SUCCESS) 
rc = varObject2- 
>addVariant (varObjectl, cultureCode, varScore) ; 

} 

> 

} 

else { 

// flag it as an error, but do not mark the entire 

table as bad 

rc = NH_INVALID_CULTURE_CODE; 

} 



return rc; 

} 



// function to change the score associated with two variants. 

// The function return: 

// 

// NH SUCCESS - if things worked out OK 

// NH~VARIANTS_DONT_EXIST - if the either name does not exist 

in the table 

// 

or the names are not already variants of each 

// 

other 

// NH_INVALID_VARIANT_SCORE - if the score is invalid 

NHReturnCode " NHVariantTable :: changeVariantScore ( char *namel, char 
*name2, char *cultureCode, double newScore) 

{ - 

NHReturnCode rc = NH_SUCCESS; 

if ((newScore < 0.0) 11 (newScore > 1.0)). 

rc =. NH_INVALID_VARIANT__SCORE; 
else { 

NHVariant *varl = getVariantOb j ectForName {namel ) ; 
NHVariant *var2 = getVariantOb j ectForName (name2 ) ; 

if ( (varl == NULL) I I (var2 == NULL) ) 

rc - NH_VARIANTS_DONT_EXIST; 
else { 

rc = varl->setVariantScoreForIdAndCulture (var2- 

>getVariantId() , cultureCode, newScore) ; 

if ( rc ==* NH_SUCCESS) 

rc = var2->setVariantScoreForIdAndCulture (varl- 
>getVariantId ( ) , cultureCode, newScore) ; 

// we should never have a case where the 

items are related 

// in one direction but not the other. 

} 

} 

return rc; 

} 



// a function to remove the relationship between two variants. 

// If either variant ends up without a relationship after this 

// operation, it is left in, but when saved, the resulting file 

// will contain a "* H rather than a related name. The function can 

// return 

// 

// NH_SUCCESS - if things worked out OK 

// nh VARIANT S_DONT_EXI ST - if the names are not already 

variants 

NHReturnCode NHVariantTable: : removeVariantRelation (char + namel, 

char *name2, char *cultureCode) 

{ 

NHReturnCode rc = NH_VARIANTS_DONT_EXIST; 

NHVariant + varl = getVariantObject ForName ( namel ) ; 



NHVariaht *var2 = getVariantdbjectForName (name2) ; 

if ((varl == NULL) || (var2 == NULL) ) 

rc = NH_VARIANTS_DONT_EXIST; 
else { 

if ( varl->removeVariant ( var2->getVariant Id ( ) , cultureCode) 
— NH_SUCCESS) { 

// we should never have a case where the items are 

related 

// in one direction but not the other, 
if ( var2->removeVariant ( varl ->get Variant Id ( ) , 
cultureCode) == NH_SUCCESS) 

rc = NH_SUCCESS; 

}' 

} 

return rc; 

} 



// File: NHVariant . hpp 
// 

// Description: 
// 

// Interface to the NHVariant class. 

// 

// 

// History: 
// 

// 6/6/97 EFB Created 

// 6/23/97 EFB Changed processing to get rid of 

variant types 

// • as assign an 

individual score for each variant pair. 

// 9/9/97 EFB Changed object so that each 

.relationship has an 

//. associated 
culture. Several access methods have 

// been changed to 

allow for a culture specifier. 

// 3/20/98 EFB Changed names to NH from SN 

// 

/* 

Variant represents the variant information for one name. 
Currently, the name must be a single segment. 
The object contains the following information: 
NHVarld id; 

// unique id 

for this variant 

byte numRelatedVariants ; 

// how many variants are we related to? 

NHVarld variantlds [MAX_VARIANTS_PER_NAME] ; // 

what are the id ! s of our related variants 

double var Scores [MAX_VARIANTS_PER_NAME] ; // 

Score for each variant 



// in variants array above 

short int varCultures [MAX_VARIANTS_PER_NAME] ; // Two 

byte code describing the culture 

// for this variant relationship. These are 

// actually char [2] codes. 

A variant knows how to add an id, type combination to its 
information. 
V 

# i f nde f NHVARI ANT_HPP 

tdefine NHVARIANT HPP 



# include <stdlib. h> 



ttinclude "NH_get_error_text . h" 
tinclude "^H culture codes. h" 



typedef unsigned char byte; 



// #define MAX_VARIANTS_PER_NAME 30 

#define NH INIT_VARIANTS_PER_NAME 5 



// define a constant to represent that two variants were 
// not related. 

#define NH_VARIANTS_NOT_RELATED -1.0 

// define a variant id as a short int. 
typedef short int NHVarld; 

#define NH_VAR_NOT_FOUND -1 

// define a structure to hold the info about a related variant 

We 

// will use arrays of this structure to list the names related to 
// a variant. 

typedef struct NH_RELATED_VARIANTS_T { 

NHVarld variantld; // what is the id of our 

related variant 

double ' varScore; // Score for this 

variant, as related to the main variant 

// in variants array above 

char varCulture [NH_MAX_CULTURE__CODE_LEN] ; 

// Two byte code describing the culture 

// for this variant relationship. These are 

// actually char[2] codes. 
} NH RelatedVariants; 



class NHVariant 
{ 

public: 

NHVariant {NHVarld newld) ; 
virtual -NHVariant ( ) ; 



// Returns the variant score for the relationship between 

the 

// the supplied variant id and the variant, within the 

specified 

// culture. If the variants are not related, the 
function returns 



// NH_VARIANTS_NOT__RELATED. 

double getVariantScoreForldAndCulture (NHVarld relatedVarld, 
char *cultCode) ; 

// allows caller to search for across cultures within 

this variant 

double getVariantScoreForldAndAnyCulture (NHVarld 

relatedVarld, char *cultCode) ; 

// see if the supplied variant is related to us, and if 

so, 

// replace the existing score with the new score. 
// if not, return NH_VARIANTS_DONT_EXIST . 
NHReturnCode setVariantScoreForldAndCulture (NHVarld 
relatedVarld, 

char *cultCode, double score); 

// adds" the id of the specified variant (along with an 

associated 

// score and culture code) to our array of variants 
related to us . 

virtual NHReturnCode addVariant (NHVariant ^variant, 

char * cultureCode, 

double relatedVarScore) ; 
// remove a variant from our list 

// return NH_VARIANTS_DONT_EXIST if the id is not in our 

list already 

virtual NHReturnCode removeVariant (NHVarld relatedVarld, 
char *cultureCode) ; 

// return the variant id for this object 
NHVarld getVariant Id ( ) {return id;} 

// return the variant id for this object 

byte getNumVariants ( ) {return numRelatedVariants; } 

NHVarld getldForRelatedVariant (int relVarlndex) 

{ 

NHVarld varld = 0; 

if ((relVarlndex > -1) && (relVarlndex < 
numRelatedVariants) ) 

varld = relatedVariants [relVarlndex] . variantld; 
return varld; 

} 

char * getCultureCodeForRelatedVariant (int relVarlndex) 

{ 

char *cultureCode = NULL; 

if ({relVarlndex > -1) && (relVarlndex < 
numRelatedVariants) ) 

cultureCode = 
relatedVariants [relVarlndex] . varCulture; 

return cultureCode; 



double 
{ 



getScoreForRelatedVariant (int relVarlndex) 
double score = 0.0; 



if ((relVarlndex > -1) && (relVarlndex < 
numRelatedVariants) ) 

score = relatedVariants [relVarlndex] .varScore; 
return score; 

} 



protected: 

NHVarld id; 

// unique id 

for this variant 

byte numRelatedVa 

riants; // how many variants are we related to? 

byte maxRelatedVa 

riants; // how many variants are we related to? 

NH RelatedVariants *relatedVariants ; 



private : 



>; 



#endif 



// File:'. .NHVariant . cpp 
// 

// Description: 
// 

// implementation to the NHVariant class. 

// 
// 

// History: 
// 

// 6/6/97 
// 3/20/98 
// 



EFB Created 

EFB Changed names to NH from SN 



#include <string.h> 
#include <stdio.h> 



#include "NHVariant . hpp fl 
tinclude "NH util.hpp" 



#ifndef false 

#define false 0 
#endif 

#ifndef true 

#define true 1 
iendif 



NHVariant: : NHVariant (NHVarld newld) 
{ 

id = newld; 

nuinRelatedVariants = 0; 

maxRelatedVariants = NH_INIT_VARIANTS_PER_NAME; 
relatedVariants = new NH_RelatedVariants [maxRelatedVariants] 

} 



NHVariant : : -NHVariant ( ) 
{ 

if (relatedVariants) 

delete [] relatedVariants; 

} 



// see if the supplied variant is related to us, and if so, ret 
its score. 

double NHVariant : : getVariantScoreFor IdAndCulture (NHVarld 

relatedVarld, char *cultCode) 

{ 

double returnScore = NH_VARIANTS_NOT_RELATED; 

for (int i = 0; i < numRelatedVariants ; i++) { 

if ( (relatedVariants [i] .variantld == relatedVarld) && 
(memcmp( relatedVariants [i] . varCulture, cultCode, 
NH_MAX_CULTURE_CODE_LEN) ==0)) { 

returnScore = relatedVariants [ i ]. varScore ; 



break; 

} 

} 

return returnScore; 

} 

// See if the supplied variant is related to us under any culture. 
// Because this method is intended to be called several times (for 
// possibly multiple cultures, it also takes a culture string that 
// is used to keep track of the last culture that was returned. The 
// first time the function is called, the culture is specified as an 
// empty string. On return, it contains the first culture found 
// in the list for the id. The next time the function is called, 
// we look past that culture/id combination in the array looking for 
// the next one, until we retura NH_VARIANTS_NOT_RELATED: 
double NHVariant : : getVariantScoreForldAndAnyCulture (NHVarld 

relatedVarld, char * cult Code) 

{ " . 

double returnScore = NH_VARIANTS_NOT_RELATED; 

bool alreadyFoundLastCultCode = false; 

for (int i = 0; i < numRelatedVariants ; i++) { 

if ( (relatedVariants [i] . variantld == relatedVarld) ) { 

// ids matched, so see if they specified a culture 

code 

if (* cult Code == EOS) { 

// this is first time through, so no check i: 

necessary. 

// copy the cult code into the supplied 

string . 

NH_safe_strcpy {cultCode, 
relatedVariants [i] .varCulture, NH_MAX_CULTURE_CODE__LEN ) ; 

returnScore = relatedVariants [i] . varScore; 
break; 

) 

else { 

// this is not first time through, they are 

passing us the cult code 

// that was found last time, so see if we 
have already found that one 

if (alreadyFoundLastCultCode == true) { 
NH_saf e_strcpy (cultCode, 
relatedVariants [i] .varCulture, NH_MAX_CULTURE_CODE_LEN) ; 

returnScore = relatedVariants [ i ]. varScore 
break; 

} 

else { 

// see if this is the cult code they 

passed us 

if (memcmp (relatedVariants [i] .varCulture, 
cultCode, NH_MAX_CULTURE_CODE_LEN ) ==0) { 

alreadyFoundLastCultCode = 

true; // we found it 

} 

} 

} 

). 

} 

return returnScore; 



// see if the supplied variant is related to us, and if so, 
// replace the existing score with the new score. 
// if not, return NHJVARIANTS JX)NT_EXIST . 

NHReturnCode NHVariant : : setVariantScoreFor IdAndCulture (NHVarld 

relatedVarld, 

char *cultCode, double score) 

{ 

NHReturnCode rc = NH_VARIANTS_DONT_EXIST; 

for (int i = 0; i < numRelatedVariants ; i++) ( 

if ( (relatedVariants [i] . variantld == relatedVarld) && 

(memcmp (relatedVariants [i] . varCulture, cultCode, 
NH_MAX_CULTURE_CODE_LEN) == 0) ) { 

relatedVariants [i] . varScore = score; 

rc - NH_SUCCESS; 

break; 

} 

} 

return rc; 

} 

// add a variant to our list 

// if the variant is already in the list, do not add it a second 
// time, and return an error 

NHReturnCode NHVariant: : addVariant (NHVariant ^variant, char 

*cultureCode, 

double relatedVarScore) 

{ 

NHReturnCode rc = NH_SUCCESS; 

NHVarld relatedVarld = variant->getVariant Id { ) ; 



// check to see if the relationship has already been 

// defined for this id/culture. 

for (int i = 0; i < numRelatedVariants; i + +) { 

if ( (relatedVariants [i] .variantld == relatedVarld) && 
(memcmp (relatedVariants [i] .varCulture, 
cultureCode, NH_MAX_CULTURE_CODE_LEN) =-0)) { 
rc = NH_VARIANT_ALREADY_RELATED; 
break; 

} 

> 

if (rc == NH_S.UCCESS) { 

// see if we are maxed out 

if (numRelatedVariants — maxRelatedVariants ) { 
// try to reallocate the space 
NH_RelatedVariants + bigger Block; 

biggerBlock = new 
NH RelatedVariants [maxRelatedVariants * 2] ; 



if (biggerBlock) { 

memcpy (biggerBlock, relatedVariants, 

sizeof(NH Rela tedVar iant 



s) * maxRelatedVariants).; 

delete [] relatedVariants; 
relatedVariants = biggerBlock; 
maxRelatedVariants *= 2; 

} 

else 

rc = NH_MAX_VARI ANT_S I ZE_INCREMENT_FAI LED ; 

} 

} 

if ( rc == NH_SUCCESS) { 

relatedVariants [numRelatedVariants ]. variantld = 
relatedVarld; 

relatedVariants [numRelatedVariants] . varScore = 
relatedVarScore; 

strncpy (relatedVariants [numRelatedVariants] ..varCulture, 
cultureCode, NH_M7\X_CULTURE_C0DE_LEN) ; 

numRelatedVariants++; 

} 

return rc; 

} 



// remove a variant from our list 

// return NH_VARIANTS_DONT_EXIST if the id is not in our list already 
NHReturnCode NHVariant : : removeVariant {NHVarld relatedVarld, char 

*cultureCode) 
{ 

NHReturnCode rc = NH_VARIANTS_DONT_EXIST ; 

for (int i = 0; i < numRelatedVariants; i++) { 

if ( (relatedVariants [i] .variantld == relatedVarld) && 
(memcmp (relatedVariants [i] . varCulture, 
cultureCode, NH_MAX_CULTURE_CODE_LEN) ==0)) { 

// now move any ids past the one that match 
// back one space. 

for (int j = i + 1; j < numRelatedVariants; 

j++) { 

relatedVariants [j - 1]. varScore - 
relatedVariants [j ] .varScore; 

relatedVariants [j - 1] . variantld = 
relatedVariants [j ] .variantld; 

strncpy (relatedVariants [j - 1 ]. varCulture, 

relatedVariants [j ] .varCu 

lture, NH_MAX_CULTURE_CODE_LEN) ; 

} 

numRelatedVariants — ; // we not have one 

less variant 

rc = NH_SUCCESS; 
break; 

} 

} 

return rc; 

} 



// ' File: NHTAQTable . hpp 
// 

// Description: 
// 

// Interface to the NHTAQTable class. 

// 

// 

// History: 
// 

// 5/7/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 

// 

// 

// The TAQTable is- organized by name and culture. That is the unique 
key 

// in the table. We do lookups by hashing the name, but must 
consider the 

// culture code as we walk the hash table bucket. 

. # i f nde f NHTAQTABLE_H PP 

#define NHTAQTABLE_HPP 



#include n NH_culture__codes . h" 
Hnclude "NHNameData . hpp" 
#include "NH_get_er'ror_text . h" 



// how many characters can a TAQ value be? 
#define NH MAX TAQ LEN 20 



// define the possible values for the TAQ action 

#define NH_T AQ_ACT I ON_DELET E 

tdefine NH TAQ ACTION DISREGARD 'D' 



'X' 



// 



taqType; 

seplf Con joined ; 

gnAction; 

snAction; 



// 
// 



// define a record in the hash table of TAQ values 
typedef struct NH_TAQ_RECORD_T { 

char taqString [NH_MAX_TAQ_LEN + 1] ; 
TAQ value 
char 
char 
char 
found in gn 

char 
found in sn 
char 

1] ; 



string that is the 



P, S, I, T or Q 
Y or N 

// what to do when 



// 



what to do when 



taqCulture [NH_MAX_CULTURE_CODE_LEN + 

// which culture does this apply to? 



struct NH_TAQ_RECORD_T *next; 
record in this hash branch 
} NH TAQRecord; 



// 



pointer to next TAQ 



// Do not change without seeing function NH_TAQhash ( ) . 
#define NH_MAX_TAQ_HASH_NODES 907 

// define a type that is a pointer to a NH_TAQRecord 
typedef NH_TAQRecord *NH_TAQRecordPtr ; 



// define ,a type that is a table (array) of NH_TAQRecordPtrs 
typedef NH_TAQRecordPtr NHJTAQHashTable [NH_MAXJTAQ_HASH_NODES] ; 



enum NH_TAQ__TABLE_TYPE { 

NH_PRODUCTION_TAQ_TABLE, 
NH_EMPTY__TAQ_TABLE 

}; 



class NHTAQTable 
{ 

public: 

NHTAQTable (NH_TAQ__TABLE_TYPE type) ; 
" -NHTAQTable {) 

// function to return a pointer to the TAQ . structure for 

the" 

// supplied character string { segment ),- cultureCode 

combination. 

// Returns NULL if the supplied segment is not known to 
the TAQ table 

// for the specified culture code. 

NH_TAQRecordPtr getTAQSegment ( char *nameSeg, 

char *cultureCode) ; 

// specialized version of the above function that looks 

for the 

// name segment in either of the specified culture codes. 

It makes 

// sure that if the name is found in the 
primaryCultureCode, that one 

// gets returned even if we come upon the 
secondaryCultureCode first. 

NH_TAQRecordPtr getTAQSegment ( char *nameSeg, 

char *pr imaryCul tureCode , 

char * secondaryCultureCode) ; 
NHReturnCode v getStatust) {return 

status ; } 

bool getDirtyO 
{return dirty; } 

void setDirty(boo 
1 aBool) {dirty = aBool; } 

int getNum 
HashBuckets ( ) {return NH_MAX_TAQ_HASH_NODES; ) 

NH_TAQRecordPtr getHashBucketStartNodeAt (int 

hashTablelndex) 

{return taqHashTable [hashTablelndex] ; ) 

NHReturnCode addTAQValue ( char 

*taqValue, char taqType, 

char sepIfConj oined, char 

gnTAQAction, 



char snTAQAction, char * taqCul ture ) ; 

NHReturnCode removeTAQValue (char 

*taqValue, char *cultureCode ) ; 

protected : 

private : 

// Returns an integer in the range [0, 
NH_MAX_TAQ_HASH_NODES ] . • 

inline unsigned int hash (char *string) 

{ 

char *p; 
unsigned int . i; 
unsigned int sum; 

for (p = string, i = 2, sum = 0; *p 1= EOS; p+ + , i += 

sum += i * *p; 
return sum %* NH_MAX_TAQ_HASH_NODES ; 
} /+ hash */ ^ 

NHJTAQHashTable taqHashTable; 

NHReturnCode status; // are we 

bool dirty; // have we changed 



2) 



valid 



); 



#endif 



// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
// 



File: NHTAQTable . cpp 
Description: 

Implementation to the NHTAQTable class. 



History: 



5/14/97 

9/9/97 

3/20/98 



EFB 
EFB 
EFB 



Created 

Added support for culture 
Changed names to NH from SN 



# include <string . h> 
#include <stdio . h> 



#include 
#include 



"NHTAQTable. hpp" 
11 NH util.hpp" 



NHTAQTable : : NHTAQTable (NH_TAQ_TABLE_TYPE type) 
{ 

status « NH_SUCCESS; 

// clear out the hash table 

for (int i = 0; i < NH_MAX_TAQ_HASH_NODES ; i++) 
taqHashTable [i] = NULL; 

// make sure we are not supposed to be doing an empty table, 
if (type == NH_PRODUCTION_TAQ_TABLE) { 

// parameters are: 

// 

// 

// 

// 

// 

disregard, X - not applicable) 
// 

disregard, X - not applicable) 



TAQ string 

taq Type (T, P, S, Q, I) , 

sepIfConjoined ( ' Y 1 or 1 N 1 ) 

Given name action (D - delete, R 



Surname action 



(D - delete, R - 



tool. 



II Culture (2 char code) 

// include the data that was generated via the TAQmanager 
iinclude "taqdata . h" 

// This stuff is just left over from testing 



addTAQValue ("DR", 'T', 1 N 1 , NH_TAQ_ACTION_DELETE, 
NH_TAQ_ACTION_DELETE, NH_CULTURE_CODE_GENERIC ) ; 

addTAQValue ("MR", ' T 1 , ' N 1 , NH_TAQ_ACTION_DELETE, 
NH_TAQ_ACTION_DELETE, NH_CULTURE_CODE_GENERIC ) ; 

addTAQValue ("MRS", 'T', 1 N 1 , NH_TAQ__ACTION_DELETE, 
NH_TAQ_ACTION_DELETE, NH_CULTURE_CODE_GENERIC ) ; 

addTAQValue ("JR", 'Q' , 1 N 1 , NH_TAQ_ACTION_DIS REGARD, 
NH_T AQ_ACT ION_D I S REGARD , NH_CULTURE_CODE_GENERIC ) ; 

addTAQValue ("SR", , Q\ 'N f , NH TAQ ACTION DISREGARD, 



NH T AQ_ACT I ON^D I S REGARD , NH_CULTURE_CODE_GENERIC) ; 

addTAQValue ( "ABDUL" , ' T', ' N ' , NH_TAQ_ACT ION_DI SREGARD , 
NH TAQ_ACTION_DISREGARD, NH_CULTURE_CODE_AP.ABIC) ; 

addTAQValue ("HOMEY", 'T' , ' N ' , NH_TAQ_ACT ION_DI SREGARD , 
NH TAQ__ACT ION_D I S REGARD , NH_CULTURE_CODE_ANGLO) ; 

addTAQValue ("CHINTAQ", 'T\ ' N ' , NH_T AQ_ACT I ON_D I S REGARD , 
NH TAQ ACT ION_D I S REGARD , NH_CULTURE_CODE_CHINESE ) ; 

addTAQValue (" HI SPTAQ", 'T', 1 N ' , NH.__TAQ_ACTI ON_DI S REGARD , 
NH TAQ ACT ION_D I S REGARD , NH_CULTURE_CODE_H I S PANIC ) ; 

addTAQValue ( " KORTAQ" , ' T ' , 1 N 1 , N H_T AQ_AC TION_DISRE GAR D , 
NH TAQ ACTIONJDISREGARD, NH_CULTURE_CODE_KOREAN ) ; 

addTAQValue ( "RUSTAQ" , 'T\ ' N 1 , N H_T AQ_AC T I ON_D I S REGARD , 
NH T AQ_ACT I ON_DI S REGARD , NH__CULTURE_CODE_RUSSIAN) ; 

*/ 
} 

// mark that the table has not been changed. Usefull for 
TAQManager application 
dirty = false; 

} 



// release all the memory used to store the NH_TAQRecords 

NHTAQTable : : -NHTAQTable ( ) 

{ 

NH_TAQRecord *prevTAQRecord; 
NH_TAQRecord *taqRecord; 

int tablelndex; 

for (tablelndex =. 0; tablelndex < NH_MAX_TAQ_HASH_NODES ; 

tablelndex++) { 

taqRecord = taqHashTable [ tablelndex] ; 
while (taqRecord != NULL) { 

prevTAQRecord = taqRecord; 

taqRecord = taqRecord->next ; 

delete prevTAQRecord; 

} 

} 

} 

// function to take the values passed in, create a NH_TAQRecord 
// structure, and add the new structure to this object 1 s 
// taqHashTable. 

NHReturnCode NHTAQTable :: addTAQValue (char *taqValue, char taqType, char 
seplf Conjoined, 

char gnTAQAction, char snTAQAction, char *taqCulture) 

{ 

NHReturnCode rc = NH_SUCCESS; 

NH_TAQRecord *newTAQRecord; 
i n t tablelndex; 
NH TAQRecord ^prevTAQRecord; 



// first, make sure we know the culture code 
if (NH validate_culture_code ( taqCulture) ) { 

7/ find the hash value for the taq 

tablelndex = hash { taqValue) ; 



// now see if the taq is already defined for this culture 

code 

// At the same time, find our insertion point, which will 

be either: 

// - the first node in the bucket, if this 

bucket is empty 

// - the end of the bucket 

prevTAQRecord = taqHashTable [ tablelndex] ; 
if (prevTAQRecord != NULL) { 

rc = NH_TAQ_ALREADY_EXISTS; // assume 

it exists 

while {strcmp (prevTAQRecord->taqStr ing , taqValue) M 

( strcmp {prevTAQRecord- 

>taqCulture, taqCulture) ) ) { 

if (prevTAQRecord->next == NULL) { 

rc = NH_SUCCESS; // does 

not exist, so looks good so far 

break; // end of bucket 

chain 

} 

prevTAQRecord = prevTAQRecord- 
>next; // move though bucket chain 

} ' 

} 

// if all is still ok {e.g. no duplicate) 
if (rc == NH_SUCCESS)- { 

// now create the new record and set its values 

newTAQRecord = new NH_TAQRecord; 

if (newTAQRecord != NULL) { 

NH_saf e_strcpy (newTAQRecord->taqString, 

taqValue, NH_MAX_TAQ_LEN) ; 

newTAQRecord->taqType = taqType; 

newTAQRecord->sepIf Conjoined = sepl f Con j oined; 

newTAQRecord->gnAction = gnTAQAction; 

newTAQRecord->snAction = snTAQAction; 

NH_saf e_strcpy (newTAQRecord->taqCulture, 
taqCulture, NH_MAX_CULTURE_CODE_LEN) ; 

newTAQRecord->next = NULL; 

// now add the new record to the chain of 

entries (or the start of the 

// bucket. We have already hashed the 
tablelndex value above, and have 

// found the correct insertion point 

if (prevTAQRecord == NULL) 

taqHashTable [tablelndex] = newTAQRecord; 

else 

prevTAQRecord->next = newTAQRecord; 

} 

else { 

rc = NH_TAQ_RECORD_ALLOC_ERROR; 
status = NH_TAQ_RECORD_ALLOC_ERROR; 

} 

} 

} 

else .{ 

// flag it as an error, but do not mark the entire table 

as bad 

rc = NH INVALID CULTURE CODE; 



return rc; 



NH_TAQRecordPtr NHTAQTable : : getTAQSegment ( char *nameSeg, char 

*cultureCode) 

{ 

int tablelndex; 
NH_TAQRecordPtr tempTAQRecordPtr; 
NH_TAQRecordPtr returnTAQRecordPtr = NULL; 

// find the hash- value for the (possible) taq 
tablelndex = hash ( nameSeg ) ; 

// go throught the records in the chain at that offset in the 
// hash table, and" try to find the taq we are looking for.. 
tempTAQRecordPtr = taqHashTable [tablelndex] ; 
while (tempTAQRecordPtr != NULL) { 

if ( ! strcmp ( tempTAQRecordPtr->taqString, nameSeg) && 

! strcmp ( tempTAQRecordPtr->taqCulture, 

cultureCode) ) { 

returnTAQRecordPtr = tempTAQRecordPtr; 
break; 

} 

else // move on to next record in- the chain 

tempTAQRecordPtr = tempTAQRecordPtr->next ; 

} 

return returnTAQRecordPtr; 



// specialized version of the above function that looks for the 
// name segment in either of the specified culture codes. It makes 
// sure that if the name is found in the primaryCultureCode, that one 
// gets returned even if we come upon the secondaryCultureCode first. 
NH TAQRecordPtr NHTAQTable :: getTAQSegment (char *nameSeg, 



char *primaryCultureCode, 



char *secondaryCultureCode) 



int tablelndex; 

NH_TAQRecordPtr tempTAQRecordPtr; 

NH TAQRecordPtr returnTAQRecordPtr = NULL; 



// find the hash value for the (possible) taq 
tablelndex = hash (nameSeg) ; 

// go throught the records in the chain at that offset in the 
// hash table, and try to find the taq we are looking for. 
tempTAQRecordPtr = taqHashTable [tablelndex] ; 
while (tempTAQRecordPtr != NULL) { 

if ( ! strcmp ( tempTAQRecordPtr->taqString, nameSeg) && 

! strcmp ( tempTAQRecordPtr->taqCulture, 

primaryCultureCode) ) { 

returnTAQRecordPtr = tempTAQRecordPtr; 
break; 

} 



the 
for. 



else // move onto next record in the chain 

tempTAQRecordPtr = tempTAQRecordPtr->next ; 

} 

// see if we need to check the secondary 
if (returnTAQRecordPtr == NULL) { 

// go throught the records in the chain at that offset in 

// hash table, and try to find the taq we are looking 



tempTAQRecordPtr = taqHashTable [ tablelndexj ; 
while (tempTAQRecordPtr != NULL) { 

if ( ! strcmp ( tempTAQRecordPtr->taqString, nameSeg) 
! strcmp ( tempTAQRecordPt r- 
>taqCulture, " secondaryCultureCode) ) {' 

returnTAQRecordPtr = tempTAQRecordPtr; 
break; 

} 

else // move on to next record in the chain 

tempTAQRecordPtr = tempTAQRecordPtr->next ; 

} 

} 

return returnTAQRecordPtr; 

}. 

// try to remove the TAQ value specified. If found, return 
// NH_SUCCESS. If not found, return. 
// The record is deleted if found. 

NHReturnCode NHTAQTable : : removeTAQValue (char *taqValue, char 

*cultureCode) 

{ 

NHReturnCode rc - NH_TAQ_NOT_FOUND; 

NH_TAQRecordPtr tempTAQRecordPtr; 
NH TAQRecordPtr prevTAQRecordPtr = NULL; 
int tablelndex = 

hash (taqValue) ; 

// go throught the records in the chain at that offset in the 
// hash table, and try to find the taq we are looking for. 
tempTAQRecordPtr = taqHashTable [tablelndex] ; 
while (tempTAQRecordPtr != NULL) { 

if (! strcmp (tempTAQRecordPtr->taqString / taqValue) && 

! strcmp (tempTAQRecordPtr->taqCulture, 



cultureCode) ) 



break; 
else { 



// save this as the prev 
prevTAQRecordPtr = tempTAQRecordPtr; 
// move on to next record- in the chain 
tempTAQRecordPtr = tempTAQRecordPtr->next ; 



) 



// once we are here, tempTAQRecordPtr will be non NULL 

// if we found it. 

if (tempTAQRecordPtr != NULL) ( 

if (prevTAQRecordPtr == NULL) { 

// this record was the first in the chain, so we 

must alter 



// the hash table entry 

taqHashTable [tablelndex] = tempTAQRecordPt r->next ; 

} . 

else // not the first in the chain, so assign the 

previous one's next 

prevTAQRecordPtr->next = tempTAQRecordPt r- 
>next; // to our next 

delete tempTAQRecordPtr ; 

rc - NH SUCCESS; 



return rc; 

} 



// 
// 
// 
// 
// 
// 
// 
// 
// 

// 

// 
// 



File: NHQueryNameData . cpp 
Description: 

Implementation to the NHQueryNameData class. 



History : 



5/14/97 
3/20/98 



EFB 
EFB 



Created 

Changed names to NH from SN 



#include <string . h> 
#include <stdio . h> 



#include "NHQueryNameData . hpp" 

#include "NHVariantTable . hpp" 

iinclude "NHResultsList . hpp" 

#include "NH_util . hpp" 

#include "NHDigraphBitmapArray . hpp" 

#include "NHNameParms . hpp" 



extern NHDigraphBitmapArray globalDigraphBitmapArray ; 

#define NH INDEX THRESH 0.5 



NHQueryNameData :: NHQueryNameData (NHNameParms *nParms, char *aGn, char 
*aSn) : 

NHNameData (nParms, aGn, 

aSn) 
{ 

resultsList = NULL; 
keysArray = NULL; 
numBitsInGnKeys = NULL; 
numBitsInSnKeys = NULL; 

processVariant Values (nParms->gnVariantTable, 

nParms->snVariantTable) ; 
} 



NHQueryNameData: : NHQueryNameData {NHNameParms *nParms, char *aGn, char 
*aSn f char *aMn) : 

NHNameData (nParms, aGn, 

aSn, aMn) 
{ 

resultsList = NULL; 
keysArray = NULL; 
numBitsInGnKeys = NULL; 
numBitsInSnKeys = NULL; 

processVa riant Values ( nParms->gnVa riant Table , 



nParms->snVariantTable) ; 
} 



NHQueryNameData: : NHQueryNameData (NHNameParms *nParms, char *name, 
NHNameFormat nameFormat) : 

NHNameData (nParms, name, 

nameFormat ) 
{ 

resultsList = NULL; 
keysArray = NULL; 
numBitsInGnKeys = NULL; 
numBitsInSnKeys = NULL; 

processVariantValues (nParms->gnVariantTable, 

nParms->snVariantTable) ; 
} 



NHQueryNameData : : -NHQueryNameData ( ) 
{ 

if (keysArray != NULL) 

delete [] keysArray; 

if {numBitsInGnKeys != NULL) 

delete [] numBitsInGnKeys; 

if (numBitsInSnKeys != NULL) 

delete [] numBitsInSnKeys; 

} 



// Function to get a pointer to a NHVariant object for each name 
// segment. We do this here, in the query 

// name, so that lookups only have to be done once for the query 
name . 

// Note also that we check first to make sure that we are supposed to 
be 

// using variants (we do this per name field). 

void NHQueryNameData : : processVariantValues (NHVariantTable 

*gnVariantTable, 

NHVariantTab 

le *snVariantTable) 
{ 

int i; 



if (nameParms->getUseGnVariants ( ) ) { 

for (i = 0; i < numGnSegments ; i++) 

gnSegmentVariants [i] = gnVariantTable- 
>getVariantObjectForName (gnSegments [i] . segString) ; 
} 

if (nameParms->getUseSnVariants ( ) ) { 

for (i = 0; i < numSnSegments ; i++) 

snSegmentVariants [i] = snVariantTable- 
>getVariantObjectForName (snSegments [i] . segString) ; 
} 

} 



// function to allocate space for, and generate, the keys for 

// this query name. The caller calls this explicitly with the 

// desired key widths for the GN and SN. We use these 

// values in conjunction with the numGnSegments and numSnSegments 



// to calculate how big to make the array that will hold the keys, 
void NHQueryNameData: : prepareKeys (NHKeyWidth gnKeyWidth, 

NHKeyWidth snKeyWidth) 

* i n t keyArraySize; 

unsigned char 1 argerNumberOf Segments ; 

i nt fullKeyLen; 



// first allocate the keys 

if (numSnSegments > numGnSegments ) 

largerNumberOf Segments = numSnSegments; 

else 

largerNumberOf Segments = numGnSegments; 
if (gnKeyWidth == NH_KEY_WIDTH_32 ) { 

if (snKeyWidth == NH_KEY_WIDTH_32) 
fullKeyLen = 64; 

else 

fullKeyLen = 96; 

} 

else { 

if (snKeyWidth == NH_KEY_WIDTH_32 ) 
fullKeyLen = 96; 

else 

fullKeyLen = 128; 

keyArraySize = largerNumberOf Segments * fullKeyLen; 
keysArray = new unsigned int [ keyArraySize] ; 

// save the key lengths 
queryGnKeyWidth = gnKeyWidth; 
querySnKeyWidth = snKeyWidth; 

// now generate the keys for the query 

numBitmapKeys = genlndexKeys (largerNumberOf Segments , gnKeyWidth, 

snKeyWidth, keysArray) ; 

// now allocate space for the arrays that hold the number of 
// bits turned on for each key in the GN and SN. 
numBitsInGnKeys = new unsigned char [largerNumberOf Segments ] ; 
numBitsInSnKeys = new unsigned char [ largerNumberOf Segments ] ; 

unsigned char *keysArrayBytePtr = (unsigned char *) keysArray; 
for (int i = 0; i < numBitmapKeys; i++) { 
if (gnKeyWidth -= NH_KEY_WIDTH_32 ) { 

// the number of bits turned on is the sum of the 

number of bits 

// in each of the 4 bytes that make up the 32 bit 

value 

numBitsInGnKeys [i] = 
globalDigraphBitmapArray.getNumBitsForByte {* ( keysArrayBytePtr++) ) + 

globalDigraphBitmapArray.getNumBitsForByte (* ( keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray.getNumBitsForByte {* (keysArrayBytePt 

r++)) + 



globalDigraphBitmapArray.getNumBitsForByte (* (keysArrayBytePt 



r++) ) ; 

} 

else { 

// the number of bits turned on is the sum of the 

number of bits 

// in each of the 8 bytes that make up the 64 bit 

value 

numBitsInGnKeys [i ] = 
globalDigraphBitmapArray . getNumBitsForByte (* ( keysArrayBytePtr++ ) ) + 

globalDigraphBitmapArray . getNumBitsForByte { * ( keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray .getNumBitsForByte ( * ( keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte { * ( keysArrayBytePt 
r++)) + " 

globalDigraphBitmapArray . getNumBitsForByte ( * ( keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte ( * ( keysArrayBytePt 

r++)) + - 

globalDigraphBitmapArray . getNumBitsForByte ( * ( keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte { * ( keysArrayBytePt 

r++)); 

} 

// now do the surname 

if (snKeyWidth == NH_KEY_WIDTH_32) { 

// the number of bits turned on is the sum of the 

number of bits 

// in each of the 4 bytes that make up the 32 bit 

value 

numBitsInSnKeys [i] = 
globalDigraphBitmapArray. getNumBitsForByte (* ( keysArrayBytePt r++ ) ) + 

globalDigraphBitmapArray .getNumBitsForByte ( * {keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte ( * { keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte (* (keysArrayBytePt 

r++) ) ; 

} 

else { 

// the number of bits turned on is the sum of the 

number of bits 

// in each of the 8 bytes that make up the 64 bit 

value 

numBitsInSnKeys [i] = 
globalDigraphBitmapArray. getNumBitsForByte {* ( keysArrayBytePtr++ ) ) + 

globalDigraphBitmapArray . getNumBitsForByte ( * ( keysArrayBytePt 

r++)) + 



globalDigraphBitmapArray. getNumBitsForByte (* (keysArrayBytePt 

r++) ) + 

globalDigraphBitmapArray . getNumBitsForByte (* (keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte ( * ( keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte { * ( keysArrayBytePt 

r++)) + 

globalDigraphBitmapArray . getNumBitsForByte { * ( keysArrayBytePt 
r++)) + , 

globalDigraphBitmapArray. getNumBitsForByte (* { keysArrayBytePt 

r++)); 

} 

} 

} 



#define N H__E I T H E R_N H_0 R_GN 1 

#define NH__BOTH_NH_AND_GN 2 

// function to compare the key(s) for this query name against 

// a supplied key from an eval name. Before this function is 

// called, the caller must have called the 

// perpareKeys ( ) method, which sets the gnKeyLength and 

// snKeyLength variables, and generates the keys for this 

// query name. 

// The comparison is performed by looking at the givename name 

// and surname portions of the key separately. For each of these 

// subkeys, we see how many bits match, a calculate the quotient of 

// matching bits / bits that could have matched. This score is 

// compared to ???. If the score for either the GN or SN comparison 

// is favorable, the function returns true to indicate that the 

// evaluation name associated with the supplied key is a possible 

// match, and should be retrieved for further consideration. 

// Since this object (the query) could generate multiple keys, 

// we may have to perform several comparisons. 

bool NHQueryNameData: : compareKey (unsigned int *evalBitMapKey, unsigned 

char numEvalKeys) 
{ 



bool 




rc = false; 


unsigned 


int 


*evalKeyPtr; 


unsigned 


int 


*queryKeyPtr; 


unsigned 


int 


*masterQueryKeyPtr = keysArray; 


unsigned 


int 


maskedVal ; 


unsigned 


char 


numBitsThatMatched; 


unsigned 


char 


*bytePtr; 


bool 




passedGn = false; 


bool 




passedSn = false; 


int 




indexMode = 



NH_BOTH_NH_AND_GN ; 

// for each of the query's keys, do both a SN and GN comparison 
// out nested loop compares the first GN and SN query key to 
// all the eval keys (inner loop), and then moves on to the 



next 

// query key (outter loop) . 

for (int i = 0; (i < numBit map Keys ) && (-re == false); i + +) { 

evalKeyPtr = evalBitMapKey; // start the 

eval ptr at the beggining 

for (int j = 0; j < (int) numEvalKeys; j++) { 



the 

after we have 



// place the queryKeyPtr back to the beggining of 

// current query key. This value gets advanced 

// compared the current query key to all eval keys 
queryKeyPtr = masterQueryKeyPtr; 



// first, check the given name 

if (queryGnKeyWidth == NH_KEY_WIDTH_32 ) { 

// just compare a 32 bit key for the gn 

maskedVal = *evalKeyPtr & * queryKeyPtr ; 

bytePtr = (unsigned char * ) &maskedVal ; 

numBitsThatMatched = 
globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++) ) + 

globalDigraphBitmapArray . getNumBitsForByte (* (bytePtr++ 

)) + 

globalDigraphBitmapArray . getNumBitsForByte ( * (bytePtr++ 

)) + 

globalDigraphBitmapArray . getNumBitsForByte (* (bytePtr++ 

) ) ; 

if ( (double) numBitsThatMatched / 
(double) numBit sInGnKeys[i] > NH_INDEX_THRESH) { 

if (indexMode == 

NH_EITHER_NH_OR_GN) { 

rc = true; 
break; 

) 

else { 

// looking for both, is SN already set? 

if 

(passedSn) { // yes, so we matched both 

rc = true; 
break; 

} 

else 

// no, just set the gn flag 

passedGn = true; 

> 

} 

evalKeyPtr++; // advance pointers 

queryKeyPtr++; 

} 

else { 

// just compare a 64 bit key for the gn 
maskedVal = *evalKeyPtr & *queryKeyPtr ; 
bytePtr = (unsigned char * ) &maskedVal; 

numBitsThatMatched = 
globalDigraphBitmapArray . getNumBitsForByte (* (bytePtr++) ) + 

globalDigraphBitmapArray . getNumBitsForByte ( * (bytePtr++ 



)■>+." ' 

globalDigraphBitmapArray . getNumBitsForByte { * (bytePtr++ 

)) + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

) ) ; 

evalKeyPtr++ ; // advance pointers to get 

to second 32 bits in this 64 bit key 

queryKeyPtr++ ; 
■ maskedVal = *evalKeyPtr & + queryKeyPtr ; 

bytePtr = (unsigned char * ) &maskedVal ; 

numBitsThatMatched += 
globalDigraphBitmapArray .getNumBitsForByte (* (bytePtr++) ) + 

globalDigraphBitmapArray .getNumBitsForByte (* (bytePtr ++ 

' ) + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

)) + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

)); 

if ( (double) numBitsThatMatched / 
(double) numBitsInGnKeys [i] > NH_INDEX_THRESH) { 

if (indexMode — 

NH_EITHER_NH_OR_GN ) { 

rc = true; 
break; 

} 

else { 

// looking for both, is SN already set? 

if 

(passedSn) { // yes, so we matched both 

rc = true; 



break; 



} 

else 

// no, just set the gn flag 

passedGn = true; 

} 

> 

evalKeyPtr++; // advance pointers 

queryKeyPtr++; 

} 

// now, check the surname 

if (querySnKeyWidth == NH_KEY_WIDTH_32 ) { 

// just compare a 32 bit key for the sn 

maskedVal = *evalKeyPtr & *queryKeyPtr ; 

bytePtr = (unsigned char + ) SmaskedVal ; 

numBitsThatMatched = 
globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++) ) + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

) ) + 

globalDigraphBitmapArray . getNumBitsForByte ( * (bytePtr++ 

) ) + 



globalDigraphBitmapArray . getNumBitsForByte ( * (bytePtr++ 

) ) ; 

if ( (double) numBitsThatMatched / 
(double) numBitsInSnKeys [i] > NH INDEX_THRESH) { 

if (indexMode == 

NH_EITHER_NH_0RJ3N) { 

rc = true; 
break; 

} 

else { 

// looking for both, is GN already set? 

if 

(passedGn) { // yes, so we matched both 

rc = true; 
break; 

} 

else 

// no, just set the sn flag 

passedSn = true; 

} 

} 

evalKeyPtr++; // advance pointers 

queryKeyPtr++ ; 

} 

else { 

// just compare a 64 bit key for the sn 
maskedVal = *evalKeyPtr & *queryKeyPt r ; 
bytePtr = (unsigned char * ) &maskedVal ; 

numBitsThatMatched = 
globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++) ) + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

)) + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

)) + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

) ) ; 

evalKeyPtr++; // advance pointers to get 

to second 32 bits in this . 64 bit key 

queryKeyPtr++; 

maskedVal = *evalKeyPtr & *queryKeyPtr ; 
bytePtr = (unsigned char * ) SmaskedVal ; 

numBitsThatMatched += 
globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++) ) + 

globalDigraphBitmapArray. getNumBitsForByte ( + (bytePtr++ 

) ) + 

globalDigraphBitmapArray .getNumBitsForByte ( + (bytePtr++ 

)} + 

globalDigraphBitmapArray. getNumBitsForByte (* (bytePtr++ 

) ) ; 

if ( (double) numBitsThatMatched / 
(double) numBitsInSnKeys [i] > NH_INDEX_THRESH) { 



if (indexMode == 

NH_EITHER_NH_OR_GN) " { 

rc = true; 
break; 

} 

else { 

// looking for both, is GN already set? 

if 

(passedGn) { // yes, so we- matched both 

rc = true; 
break; 

} 

else 

// no, just set the sn flag 

passedSn = true; 

} 

} 

evalKeyPtr++; // advance pointers 

queryKeyPtr++; 

} 

} 

// place the master query pointer (for the outer, query 
loop) at the next 

// query key. We will be. advancing the pointer somewhere 

between 1 and 4 

// positions (each position is 4 bytes) . 
if (queryGnKeyWidth == NH_KEY_WIDTH_32 ) 
masterQueryKeyPtr++; 

else 

masterQueryKeyPtr += 2; 
if (querySnKeyWidth NH_KEY_WIDTH_32 ) 
masterQueryKeyPtr++; 

else 

masterQueryKeyPtr 2; 



return rc; 

) 



// implementation of SNParmsType helper functions 



ttinclude <stdlib . h> 
# include <string . h> 

#include "NHParmsType . h M 



int NH_getParmsTypeIndex (NHParmsType aParmsType) ; 



bool NH_validate_parms_type (NHParmsType aParmsType ) 
{ 

return NH_getParmsTypeIndex (aParmsType ) . ! = -1; 

} 

char * NH_get_culture_string_f or_parm_type {NHParmsType aParmsType) 

{ 

int index = NH_getParmsTypeIndex (aParmsType ) ; 

char *rc; 

if (index != -1) 

rc = NH_culture_strings [index] ; 

else 

rc = NULL; 
return rc; 



bool get culture_code_for_parms_type (NHParmsType aParmsType, 

NHCultureCode cultureCode) 

{ 

bool rc; 

int index = NH_getParmsTypeIndex (aParmsType ) ; 

if (index != -1) { 

strncpy (cultureCode, NH_culture_codes [ index] , 
NH_MAX_CULTQRE_CODE_LEN) ; 

cultureCode [NH_MAX_CULTURE_CODE_LEN] = 1 \0 1 ; 
rc = true; 

} 

else 

rc = false; 
return rc; 



// function to get the ordinal position of the 

// parms type. We use this to then index into the 

// NH_culture_codes and NH_culture_strings arrays. 

// For this to work, we must make sure that the relative 

// order of these enums and arrays stays constant. 

int NH_getParmsType Index (NHParmsType aParmsType) 

{ 

int rc = aParmsType; 



if ((rc < 0) M (rc >= NH_NUM_PARMS_TYPES) )• 
rc = -1; 

return rc; 



// File: NHResultsList . cpp 
// . 

// Description: 
// 

// Implementation to the NHResultsList class. 

// 

// 

// History: 
// 

// 6/10/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 

// how big should the results list array start out as if 

// they want one that is expandable 
idefine NH DEFAULT RESULTS_ARRAY_SIZE 2 



# include <string . h> 

#include <stdio.h> 

#include <stdlib.h> 

#include <ctype . h> 

#include "NHResultsList . hpp" 
#include "NHEvalNameData . hpp" 
#include "NH util.hpp" 



static int NH_result_sort_f unction { const void *argl, const void *ar 
) ; 



NHResultsList: : NHResultsList (int maxHits) 
{ 

hitArray = NULL; 
isSorted = true; 
numHitsInArray = 0; 
status = NH^SUCCESS; 

if (maxHits > 0) { 

hitArraySize = 2 * maxHits; 
maxHitsToReturn = maxHits; 

hitArray « (NHEvalNameData **) malloc (hitArraySize * 
sizeof (NHEvalNameData *)); 

if (hitArray NULL) 

status = NH_RESULTS_LIST_ALLOCATION_ERROR; 

} 

else { 

if (maxHits == N H_RE S U LT S_L I S T_S I Z E_EX PAN DAB L E ) { 
// they want an expandable list 
hitArraySize - NH_DEFAULT_RESULTS_ARRAY_SIZE; 
maxHitsToReturn = NH_RESULTS_LIST_SIZE_EXPANDABLE; 
hitArray = (NHEvalNameData **) malloc (hitArraySize 
sizeof (NHEvalNameData *)); 

if (hitArray == NULL) 

status = NH_RESULTS_LIST_ALLOCATION_ERROR; 

} 

else { 

status = NH_INVALID_RESULTS_LIST_SIZE; 
hitArraySize = 0; 



maxHitsToReturn = 0; 

} 



} 



NHResultsList : — NHResultsList () 
{ 

for {int i = 0; i < numHitsInArray; i++J { 
delete hitArray [i ] ; 

} 

free (hitArray) ; 

} 

NHEvalNameData * NHResultsList ;: getHitAt ( int anlndex) 

{ 

NHEvalNameData ' *returnHit ; 

// first make sure the list is sorted 
if (isSorted == false) { 
sortHits ( ) ; 

} 

// now make sure the requested index, is valid 
if ( (anlndex >= 0) && (anlndex < numHitsInArray) ) 
returnHit = hitArray [anlndex] ; 

else 

returnHit = NULL; 
return returnHit; 

} 



// add a hit to the results list. We make a copy of the hit using 

// the default constructor. This should work ok, since do not 

// do any dynamic allocation in the class or its subclass. If this 

// ever changes, we will need to create a copy constructor for 

// the NHEvalNameData and NHNameData classes. 

NHReturnCode NHResultsList : : addHit (NHEvalNameData *aHi t ) 

{ 

NHEvalNameData **newHitArray; 
NHReturnCode rc = NH_SUCCESS; 

NHEvalNameData *hitCopy = new NHEvalNameData (*aHit ) ; 



// *hitCopy = *aHit; 

// if we are supposed to expand 

if (maxHitsToReturn == N H_RE S U LT S_L I ST_S I Z E_EX PAN DAB L E ) { 
if . (numHitsInArray + 1 — hitArraySize ) { 

// we are full, so we must reallocate 
newHitArray = (NHEvalNameData **) realloc (hitArray, 
hitArraySize * 2 * sizeof (NHEvalNameData *)); 

if (newHitArray != NULL) { 
hitArray = newHitArray; 
hitArraySize *- 2; 

) 

else 

rc = NH RESULTS LIST INSERT ALLOC__FAILURE; 



if (rc"== NH_SOCCESS) { 

hitArray [numHitsInArray] = hitCopy; 
isSorted = false; 
numHitsInArray++; 

> 

} 

else { 

if (hitArray != NULL) { 

hitArray [numHitsInArray ] = hitCopy; 

numHitsInArray++; 

isSorted = false; 

// first, make sure our list is not full yet 
if {numHitsInArray >= hitArraySize } 
sortHits ( ) ; 

} 

else 

rc = NH_RESULTS_ARRAY_NULL_ERROR; 

} 

return rc; 

} 



// sort the hits, and make sure there are no more than 
// ' rnaxHitsToReturn items in* the array. Any excess items 
// should be deleted, and the numHitsInArray variable 
// set to be equal to rnaxHitsToReturn 
void NHResultsList :: sortHits ( ) 
I 

// first, make sure we have something. to sort 
if (numHitsInArray > 1) { 
// sort the hits 

qsort (hitArray, numHitsInArray, sizeof (NHEvalNameData * ) , 

NH_result_sort_f unction) ; 
// now, if we have more hits than they wanted, chop some 

off 

// but only chop if we are not expandable 
if (rnaxHitsToReturn != 
NH_RESULTS_LIST_SIZE_EXPANDABLE) { 

if (numHitsInArray > rnaxHitsToReturn) { 
for (int i = rnaxHitsToReturn; i < 

numHitsInArray; i++) 

delete hitArray [i] ; 
// reflect the new number of hits in the 

array 

numHitsInArray = rnaxHitsToReturn; 

} 

> 

} 

isSorted = true; 

} 



// return the number of hits. We need to make sure that if 
// we are using a fixed size, we do not return a value greater 
// than the number they requested, 
int NHResultsList :: getNumHits (void) 
{ 

if (rnaxHitsToReturn ~ NH_RESULTS_LIST_SIZE_EXPANDABLE) 
return numHitsInArray; 

else 

return numHitsInArray < rnaxHitsToReturn ? 



} 



numHitsInArray : maxHitsToReturn 



// compare function for the results list. Here, we cast the 
// arguments to pointers to NHEvalNameData objects, and compare 
// their scores to see who is larger. 

int NH result sort__f unction ( const void *argl, const void *arg2 ) 
{ 

NHEvalNameData *iteml = * (NHEvalNameData **) argl; 
NHEvalNameData *item2 = * (NHEvalNameData **) arg2; 

return iteml->compareScore ( item2 ) ; 

} 



// File: NHNameParms .cpp 
// . 

// Description: 
// 

// Implementation to the 

// 

// 

// History: 
// 

// 2/27/98 EFB 

parameters from 

// 

parameters . 

// 3/20/98 EFB 

// 



NHNameParms class. 

Created to separate pre-processing 

comparison- 
Changed names to NH from SN 



#include <string.h> 
tinclude <stdio.h> 
#include <stdlib.h> 



#include "NHNameParms . hpp" 

tinclude "NHVariantTable . hpp" 

#include "NHTAQTable . hpp" 

#include "NH_variant_taq_globals . h" 

#include "NHParmsType . h" 



NHNameParms: : NHNameParms (NHParmsType aParmsType, 

bool gnVariants, bool snVariants, 

bool gnTaqs, bool snTaqs, 

bool gnUnknowns, bool snUnknowns, 

const char *segBreakCharacters , 

const char *noiseCharacters ) 
{ 

// assume success 
status = NH_SUCCESS; 

// set these to NULL until we get them 

gnVariantTable = NULL; 

snVariantTable = NULL; 

taqTable = NULL; 

segmentBreakChars = NULL; 

noiseChars = NULL; 

parmsType = aParmsType; 

// set up the culture codes with the one they specified as th 
// primary, and generic as the secondary. 
// This also makes sure the specified culture is valid 
if ( (get_culture_code_for_parms_type {aParmsType, 
primaryCultureCode) == false) I | 

(get culture code for parms type (NH_PARMS_GENERIC / 



secondaryCultureCode) == false) ) 

status = NH_INVALID_PARMS_TYPE; 
else { 

// copy the callers specifications 
useGnVariants = gnVariants; 
useSnVariants = snVariants; 
checkGnUnknowns = gnUnknowns; 
checkSnUnknowns = snUnknowns; 
separateGnTaqs = gnTaqs; 
separateSnTaqs = snTaqs; 



the 
be 

it + ) ( 



// create an artificial loop to cycle through the rest of 

// items that need to be created. If more items need to 

// added or removed, make sure the 5 changes below) . 

for (int i - 0; (i <' 5) && (status == NH_SUCCESS) ;. 



switch (i) { 
case 0: 

gnVariantTable = 
NH getVariantTable (NH_GIVENNAME_VARIANTS ) ; 

if (gnVariantTable == NULL) 
status = 

NH_GN_VAR_TABLE_CREATION__ERROR; 

else 

status = gnVariantTable- 

>getStatus ( ) ; 

break; 
case 1: 

snVariantTable = 
NH_getVariantTable (NH_SURNAME_VARIANTS ) ; 

if (snVariantTable == NULL) 
status = 

NH_NH_VAR_TABLE_CREATION_ERROR; 

else 

status = snVariantTable- 

>getStatus ( ) ; 

break; 
case 2: 

taqTable = NH_getTAQTable ( ) ; 
if (taqTable == NULL) 
status = 

NH_TAQ_TABLE_CREATION_ERROR; 

else 

status = taqTable->getStatus () ; 

break; 
case 3: 

// provide a default if they specified 

NULL; 

if (segBreakCharacters == NULL) 
segmentBreakChars = 

strdup (NH_DEFAULT_SEG_DELIM_CHARS) ; 

else 

segmentBreakChars = 

strdup (segBreakCharacters) ; 

if (segmentBreakChars == NULL) 
status = 

NH_S EG_BRE AK_C H ARS_C RE AT I ON_ERROR ; 

break; 
case 4: 



// provide a default if they specified 

NULL; 

if (noiseCharacters == NULL) 
noiseChars = 

strdup (NH_DEFAULT_NOISE_CHARS) ; 

else 

noiseChars = 

strdup (noiseCharacters) ; 

if (noiseChars == NULL) 
status = 

NH_NO I S E_C H ARS_CRE AT I ON_ERROR ; 

break; 

} 

) 

} 

1 

// constructor to read from file stream 
NHNameParms : :NHNameParms (ist'ream SinStream) 
{ 

status = NH_SUCCESS; // assume success 

segmentBreakChars = NULL; 
noiseChars = NULL; . 

if (inStream.goodO ) ( 

inStream.read( (char * ) &parmsType, sizeof ( NHParmsType ) ) ; 
inStream. read( (char * ) &useGnVariants sizeof (bool )) ; 
inStream. read( (char *) SuseSnVar iants , sizeof (bool) ) ; 
inStream. read( (char * ) SseparateGnTaqs , sizeof (bool) ) ; 
inStream. read( (char + ) &separateSnTaqs , sizeof (bool) ) ; 
inStream. read( (char + ) ScheckGnUnknowns , sizeof (bool) ) ; 
inStream. read ( (char *) &checkSnUnknowns , sizeof (bool) ) ; 

// write the culture strings. 

inStream. read ( (char * ) primaryCultureCode, 
NH_MAX_CULTURE_CODE_LEN + 1) ; 

inStream. read ( (char * ) secondaryCultureCode, 
NH_MAX_CULTURE_CODE_LEN + 1) ; 

int stringLen; 

char tempString[200 + 1]; 

// read string as the length, followed by the null 



terminated 



// string, including the NULL 

inStream. read ( (char * ) &stringLen, sizeof ( int )) ; 

// make sure we read a reasonable amount from the file 
if (stringLen <= 200) ( 

inStream. read ( (char * ) tempString, stringLen + 1) ; 

setSegmentBreakChars (tempString) ; 



null terminated 



1) ; 



// write out string as the length', followed by the 

// string, including the NULL 

inStream. read( (char * ) SstringLen, sizeof (int) ) ; 

if (stringLen <= 200) { 

inStream. read( (char *) tempString, stringLen + 

setNoiseChars (tempString) ; 



else ' ■ 

status = NHJ*AME_PARMS_FILE_NOISE_CHARS_ERROR; 

} 

else 

status = NH_NAME_PARMS_FILE_BREAKS_CHARS_ERROR; 

} 

else 

status = NH_NAME_PARMS_BAD_STREAM_ON_CONSTRUCT; 

// as a last check, make sure the culture code is valid 
if (status == NH_SUCCESS) { 

if (NH_validate_culture_code (primaryCultureCode ) === false) 
status = NH_NAME_PARMS_FILE_BAD_CULTURE_CODE; 

else 

if (NH__validate_culture_code ( secondaryCultureCode ) == 

false) 

status = NH_NAME_PARMS_FILE_BAD_CULTURE__CODE ; 

} 

} 



NHNameParms : : -NHNameParms { ) 
{ 

.if (segmentBreakChars != NULL) 
free (segmentBreakChars) ; 
if (noiseChars != NULL) 
free (noiseChars ) ; 

} 



// write out the NHNameParms object to a file so that it can 
// be read in at a later time. 

NHReturnCode NHNameParms :: archiveData (ostream SoutStream) 

{ 

NHReturnCode rc = NH_SUCCESS; 

if (outStream. good() ) { 

outStream. write ( (char *) SparmsType, sizeof (NHParmsType ) ) ; 
" outStream. write ( (char * ) &useGnVariants , sizeof (bool ) ) ; 
outStream. write ( (char *) &useSnVariants , sizeof (bool) ) ; 
outStream. write ( (char *) &separateGnTaqs , sizeof (bool) ) ; 
outStream. write ( (char *) &separateSnTaqs , sizeof (bool) ) ; 
outStream. write ( (char *) &checkGnUnknowns , sizeof (bool) ) ; 
outStream. write ( (char *) ScheckSnUnknowns , sizeof (bool) ) ; 

// write the culture strings, plus their NULL terminators. 

outStream. write ( (char * ) primaryCultureCode, 
NH_MAX_CULTURE_CODE_LEN + 1); 

outStream. write ( (char * ) secondaryCultureCode , 
NH_MAX_CULTURE_CODE_LEN + 1 ) ; 

int stringLen; 

// write out string as the length, followed by the null 

terminated 

// string, including the NULL 
stringLen = strlen (segmentBreakChars ) ; 
outStream. write ( (char *) SstringLen, sizeof (int) ) ; 
outStream. write ( (char *) segmentBreakChars t stringLen + 1); 



// write out string as the length, followed by the null 

terminated 



// string,' including the NULL 
stringLen = strlen (noiseChars ) ; 

outStream. write ( (char *) ^stringLen, sizeof (int) ) ; 
outStream. write ( (char *) noiseChars , stringLen + 1); 

} 

else 

rc = NH_NAME_PARMS_BAD_STREAM_ON_WRITE; 
status = rc; 
return rc; 

} 

NHReturnCode NHNameParms : : setSegmentBreakChars (char *segBreakChars ) 

{ 

NHReturnCode retCode = NH_SUCCESS; 

// first get rid of the old set of characters 
if (segmentBreakChars != NULL) { 

free (segmentBreakChars) ; 

segmentBreakChars = NULL; 

} 

// if they gave us a string to set, go ahead and 

// make a copy of it 

// If they gave us NULL, make a 

// copy of an empty string, so we wont have to worry 
// about accessing a NULL later on. 
if (segBreakChars == NULL) 

segBreakChars = ""; 
segmentBreakChars =» strdup ( segBreakChars ) ; 
if (segmentBreakChars == NULL) 

retCode = NH_SEG__BREAK_CHARS__CREATION_ERROR ; 

return retCode; 

'} 

NHReturnCode NHNameParms : : setNoiseChars (char ^string) 

{ 

NHReturnCode retCode = NH_SUCCESS; 

// first get rid of the old set of characters 
if (noiseChars !« NULL) { 

free (noiseChars) ; 

noiseChars = NULL; 

} 

// if they gave us a string to set, go ahead and 
// make a copy of it. If they gave us NULL, make a 
// copy of an empty string, so we wont have to worry 
// about accessing a NULL later on. 
if (string NULL) 

string = 
noiseChars = strdup (string) ; 
if (noiseChars == NULL) 

retCode = NH NOISE CHARS CREATION_ERROR; 



} 



return. retCode; 



// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
// 
// 



File: NHNameData . cpp 

i 

Description: 

Implementation t'o the NHNameData class, 



History: 



5/8/97 
3/20/98 



EFB 
EFB 



Created 

Changed names to NH from SN 



iinclude <string.h> 
iinclude <stdio.h> 
#include <stdlib.h> 
iinclude <ctype.h> 

#include "NHNameData . hpp' r 

iinclude "NHTAQTable . hpp" 

iinclude "NHVariantTable . hpp" 

iinclude "NH_util . hpp" 

iinclude "NHDigraphBitmapArray . hpp" 

iinclude "NHNameParms . hpp" 



extern 



NHDigraphBitmapArray 



globalDigraphBitmapArray ; 



NHNameData : : NHNameData (NHNameParms *nParms , 
{ 

nameStorage = NULL; 

= strlen (aGn) ; 

strlen (aSn) ; 
NH MAX GN LEN) 



char *aGn, char *aSn) 



int gnLen 
int snLen 
if {gnLen > 



gnLen = NH_MAX__GN_LEN ; 
if (snLen > NH_MAX_NH_LEN) 

snLen = NH__MAX_NH_LEN ; 
allocateNameStorage (gnLen, snLen) ; 
// NH_safe_strcpy(gn, aGn, NH_MAX_GN_LEN) ; 
// NH_safe_strcpy (sn, aSn, NH_MAX_NH_LEN) ; 
NH_safe_strcpy (gn, aGn, gnLen); 
NH_safe_strcpy (sn, aSn, snLen); 

// save a pointer to the parameters 
nameParms = nParms; 

// Do the pre-processing on the new name 
preprocessName (nParms->getNoiseChars ( ) , nParms- 
>getSegmentBreakChars ( ) ) ; 

processTAQValues (nParms->taqTable) ; 

} 



NHNameData: :NHNameData (NHNameParms *nParms, char *aGn, char *aSn, char 
*aMn) 



{ 



int gnLen = strlen(aGn); 



int mnLen = strlen(aMn); 
int snLen = strlen(aSn); 

int gnAllocLen = gnLen + mnLen + 1; // space for 

the gn, mn and the implied 

// space inbetween 

if (gnAllocLen > NH_MAX_GN_LEN) 

gnAllocLen - NH_MAX_GN_LEN; 
if (snLen > NH_MAX_NH_LEN) 

snLen = NH_MAX_NH__LEN ; 

// allocate internal space for the gn and sn 
allocateNameStorage {gnAllocLen, snLen) ; 

// NH_safe_strcpy (gn, aGn,. NH_MAX_GN_LEN) ; 
// NH_safe__strcpy (sn, aSn, NH_MAX_NH_LEN) ; 

NH_safe_strcpy (gn, aGn, gnAllocLen); 

NH__safe_strcpy (sn, aSn, snLen); 

// now append the middle name onto the gn, but 
// make sure we do not exceed the max allowed chars 
// which is currently the number of chars allowed for 
// GN. 

// We also append a space to the end of the gn so the 
// middle name is separated from it. 

if (gnLen < NH_MAX_GN_LEN) { // make . 

sure there is atleast some room 

strcat(gn + gnLen, " "); 
// NH_safe_strcpy (gn + gnLen + 1, aMn, NH_MAX_GN__LEN - (gnLen + 

1) ) ; 

NH_safe_strcpy (gn + gnLen + 1, aMn, gnAllocLen - (gnLen + 

D>; 

} 

// Things past here are things that need to be done for all 
constructors 

// so we may want to move them into a single function that each 

of the 

// constructors can call. 

// save a pointer to the parameters 
nameParms = nParms; 

// Do the pre-processing on the new name 
preprocessName (nParms->getNoiseChars ( ) , nParms- 
>getSegmentBreakChars ( ) ) ; 

processTAQValues (nParms->taqTable ) ; 

} 



// create a name from a single string. The caller passes in a 

// NHNameFormat telling us the format. 

// We break the string up into GN and SN. We currently support 

// NH_SURNAME_COMMA_GI VENNAME , 

// NH_LAST_SEG_IS_SURNAME, 

/ / NH_NAME_FORMAT_UNKNOWN . 

// 

// For NH_SURNAME_COMMA_GIVENNAME, we place everything to the left of 

// a comma in the surname, everything to the right in the given name, 



//. and remove the comma. If there is no comma, the entire string 
// goes into the given name. 

// for NH_LAST_SEG_IS_SURNAME, the last segment becomes the surname, 
// and all other segments go into the given name. The process is 
smart 

// enough to recognize TAQ values when determining the "last" 
// segment, so that trailing TAQ values are not considered the last 
// segment. Instead, the last non-TAQ segment is treated as the 
// last segment. Since the surname should include prefixes and 
// suffixes to the "last"' segment, we must walk backwards from 
// the "last" segment looking for prefixes (TAQ values that occur 
// before the "last" segment, and are associated with the "last" 
// segment. Some example below help clarify: 

I / Ed Barker SR — > bN - 

Ed ■ SN = Barker SR 

// Maria De Lahosa Esquire --> GN = Maria SN = De 

Lahosa Esquire 

// Maria NIETA De Lahosa Esquire — > GN = Maria 

NIETA SN = De Lahosa Esquire 

// Maria consuala De Lahosa Esquire — > GN = Maria 

consuala SN = De Lahosa Esquire 

// Examplel is simple - SR is a qualifier (suffix), so It gets 

placed . . _ , . 

// as part of the surname, along with barker, which is the last 

// real segment . 

// Example2 adds the idea of a prefix (De), which gets 

a s s 1 c 1 a t e d 

// w ith the last real segment "Lahosa". There is also a 

qualif er 

// "Esquire", that gets associated with Lahosa. 

// Example3 differs from example 2 in that there is another TAQ 

// value "NIETA", but it is a qualifier that appears before the 

// "last" real segment, so it does NOT get assicated with the 

// Surname. 

// Example 4 is similar to example 3, except the "consuala" 

// segment is not a TAQ value, so it gets associated with the 

// Given name (since by default we only use one segment 

// for the surname. 

// 

// If a name is just one . 

// segment, it becomes the surname, and the given name is blank. 

// Currently, N H_N AME_FO RMAT _U N KN OWN is treated the same as 

// NH LAST_SEG_IS_SURNAME. 

NHNameData: : NHNameData (NHNameParms *nParms, char *name, NHName Format 
name Format) 

// Need to spilt up the name here, so that the 

// last segment goes into the sn field, and all other 

// parts of the name go into the gn field. 

if (nameFormat == NH_SURNAME_COMMA_GIVENNAME) { 
char *f irstComma; 



firstComma = strrchr (name, ','); 
if (firstComma 1= NULL) { 

// found a comma, so copy everything up to there 



into the 



string, 
the sn 

name 



// sn field ' • 

int snLen = firstComma - name ; 

int gnLen = strlen { f irstComma + 1); 

// make sure the strings are not too big 
if (gnLen > NH_MAX_GNJLEN ) 

gnLen - NH_MAX_GN_LEN ; 
if (snLen > NH_MAX_NH_LEN) 

snLen = NH_MAX_NH_LEN ; 

// allocate space for names and segments 
allocateNameStorage (gnLen, snLen) ; 

// make sure the comma was not too far into the - 
// i.e. make sure we do not copy to many chars into 
NH_saf e_strcpy (sn, name, snLen); 

// copy everything past the comma into the give 



// NH_safe_strcpy {gn, firstComma + 1, NH_MAX__GN_LEN) ; 

NH__saf e_strcpy (gn, firstComma + 1, gnLen); 
NH~strip (gn) ; 

} 

else { 

// no comma found, so put everything in the GN 
// and blank out the sn. 
int gnLen = strlen (name) ; 

if (gnLen > NH_MAX_GN_LEN) 

gnLen = NH_MAX_GN_LEN; 
allocateNameStorage (gnLen, 0) ; 
NH_safe_strcpy (gn, name, gnLen) ; 
// NH_safe_strcpy (gn, name, NH_MAX_NH_LEN) ; 

*sn = EOS; 

} 

} 

else { 

// allocate for a worst case 

allocateNameStorage (NH_MAX_GN_LEN, NH_MAX_NH_LEN) ; 
// name format must be NH_LAST_SEG_IS_SURNAME or 
NH_NAME_FORMAT_UNKNOWN 

char *lastSpace; 

// copy the entire string into the given name 

// and strip it. We must strip it because we do not 

// want to find spaces that occur at the end of the name 



properly . 



// this would keep us from getting the last segment 

NH_safe_strcpy (gn, name, NH_MAX_GN_LEN) ; 
NH~strip (gn) ; 



lastSpace = strrchr(gn, ' '); 
if (lastSpace != NULL) ( 

char tempSegment [NH_MAX_NH_LEN + 1]; 

char *segmentEndPtr « gn + strlen (gn) - 
1 ; // points to end of last segment 

char +lastRealSegmentStart - gn; // assume 



we- have all TAQ values, 

// in which 

case we place them 

// all into the 

SN field. 

NH_TAQRecordPtr tempTAQRecordPtr ; // pointer to 
structure for a TAQ record 

char *primaryCultureCod 
e = nParms->primaryCultureCode; 

char *secondaryCultureC 
ode = nParms->secondaryCultureCode; 

NHTAQTable *taqTable = nParms->taqTable ; 

while (lastSpace != NULL) { 

// found a space, so see if this segment is a 

TAQ value 

NH_saf e_strcpy (tempSegm'ent , lastSpace + 1, 
segmentEndPtr - lastSpace) ; 

strupr (tempSegment ) ; 

// see if this segment is a TAQ value 
tempTAQRecordPtr = taqTable- 
>getTAQSegment (tempSegment, 

primaryCultureCode , 



this segment 



because the entire 
in the gn 
space, we are 



secondaryCultureCode) ; 

// if this value was not a TAQ value, then 

// is the real last segment, 
if (tempTAQRecordPtr NULL) { 

lastRealSegmentStart = lastSpace + 1; 
break; 

} 

// we can safely look at lastSpace - 1 



// gn was stripped, so the leading character 
// could not be a space. Thus, if we have a 



// not at the begining of the string 
segmentEndPtr = lastSpace - 
1; // get ptr to end of prev segment 

lastSpace = NH_strrchr (gn, segmentEndPtr, 1 ' ); 

} 

// when we are here, lastRealSegmentStart points to 

the start 

// of the last real segment. However, there may be 

TAQ values 

// preceeding the last real segment that are 
prefixes or titles, 

// in which case they should be associated with the 

surname. 

if (lastSpace != NULL) { 

segmentEndPtr = lastSpace - 
1; // get ptr to end of prev segment 



lastSpace = NH_strrchr (gn, segmentEndP.tr, '); 
if (lastSpace == NULL) 
lastSpace = gn 

1; // make sure we check the first segment 

while (1) { // breaks get us out of 

this loop 

NH_safe_strcpy ( tempSegment , lastSpace + I, 

segmentEndPtr - lastSpace); 

strupr (tempSegment ) ; 
tempTAQRecordPtr = taqTable- 

>getTAQSegment (tempSegment, 

primaryCultureCode, 



then it should 
so just break. 



secondaryCultureCode) ; 

// if this value was not a 'TAQ value, 

// not be associated with the surname, 

if {tempTAQRecordPtr == NULL) { 
break; 

} 

else { 

// this segment was a TAQ value. 



// title, we should associated it 
// just break, so it gets placed 



If it is a prefix or a 
with the surname. Otherwise 
with the given name 

if ( (tempTAQRecordPtr->taqType != 
' P') && {tempTAQRecordPtr->taqType != 1 T 1 ) ) 

break; 

else 

lastRealSegmentStart - 
lastSpace +1; // include this TAQ as part of sn 

} 

if (lastSpace == (gn - 1) ) 

break; // already 

checked first segment 

else { 

segmentEndPtr = lastSpace - 
1; // get ptr to end of prev segment 

lastSpace = NH_strrchr (gn, 

segmentEndPtr, ' f ) ; 

if (lastSpace == NULL) 
lastSpace = gn - 
1; // make sure we check the first segment 

} 

} 



NH_MAX_NH_LEN) ; 
the GN 



NH_saf e_strcpy (sn, lastRealSegmentStart, 
'lastRealSegmentStart = EOS ; // terminate 



} 

else ( // no space found in name, so put 

everything in surname field 



NH_safe_strcpy (sn, gn, NH_MAX_NH_LEN) ; 
*gn - EOS; 

} 

} 

// save a pointer to the parameters 
nameParms = nParms; 

// Do the pre-processing on the new name 
preprocessName (nParms->getNoiseChars ( ) , nParms- 
>getSegmentBreakChars ( ) ) ; 

processTAQValues (nParms->taqTable ) ; 

} 



// constuct an object from an archived representation in 
// a stream. 

// 

// The archive is in the following order 
// 

// . gnLen 
/ / snLen 
// nameStorage 

NHNameData : : NHNameData (NHNameParms . *nParms, istream &inStream) 
{ 

short int gnLen; 
short int snLen; 



// save a pointer to the parameters 
nameParms = nParms; 

// read the given name len and surname len 
inStr earn. read ( (char * ) &gnLen, sizeof (gnLen) ) ; 
if (inStream.gcount ( ) == sizeof (gnLen) ) { 

inStream. read ( (char *)&snLen, sizeof ( snLen) ) ; 

if (inStream. gcount ( ) == sizeof (snLen) ) { 

// allocate space based on the name lengths 
allocateNameStorage (gnLen, snLen) ; 

// read the name data into the allocated storage. 
inStream. read (nameStorage, variableNameAllocSize ) ; 

// read in the number of gn segments 
if (inStream) 

inStream. read ( (char *) SnumGnSegments, 

sizeof (numGnSegments ) ) ; 

if (inStream) { 

// read in the gn segments. These have been 

stored in a special 

// format, with the offset of the segString 

as the first item 

unsigned short int segOffset; 

for (int i = 0; i < numGnSegments; i++) ( 

// first, read in the offset (into the 

gnSegs area) of the segString 

if (inStream) 

inStream. read ( (char * ) &segOf f set , 

sizeof ( unsigned short int)); 



if (inStream) { 

// update the pointer 

if (segOffset == (unsigned short 

int)-l) 

gnSegments [i] .segString = " " ; 

else 

gnSegments [i] .segString' = 

gnSegString + segOffset; 

} 

// now, read in the number of TAQs 
inStream. read ( (char 
* ) & (gnSegments [ i ] . numTAQs ) , sizeof (unsigned char)); 

if (inStream) { 

// now read in the TAQs. These 

are stored just like the segment, 

// such that the leading element 

is the offset of the segString. 

for (int j = 0; j < 

gnSegments [i] .numTAQs; j++) { 

// first, the offset 
inStream. read ( (char 

*) &segOf f set , sizeof (unsigned short int)); 

gnSegments [i] .taqList[j] .segSt 

ring = gnSegString + segOffset; 

// next, the TAQ action 
inStream. read ( (char 
*) & {gnSegments [i] . taqList [ j ] . taqAction) , sizeof (char) ) ; 

// lastly, the TAQ type 
inStream. read ( (char 
*) & (gnSegments [i] . taqList [ j ] . taqType) , sizeof (char ) ) ; 

} 

} 

// lastly for the segment, the status 
inStream. read ( (char 
*)& (gnSegments [i] .status) , sizeof (unsigned char)); 

) 

// read in the number of sn segments 
inStream. read{ (char * ) &numSnSegments , 

sizeof (numSnSegments) ) ; 

if (inStream) { 

// do the same thing for the surname 

segments 

for (i =0; i < numSnSegments; { 
// first, read in the offset 
(into the snSegs area) of the segString 

inStream. read ( (char *) SsegOf fset, 

sizeof (unsigned short)); 

if (inStream) { 

// update the pointer 
if (segOffset (unsigned 

short int)-l) 

snSegments [i] .segString 



else 

snSegments [i] .segString 

= snSegString + segOffset; 

} 

// now, read in the number of 

TAQs 

inStream. read { (char 
*} & (snSegments [i] . numTAQs ) , sizeof (unsigned char) ) ; 

if (inStream) { 

// now read in the TAQs . 



// such that the leading 

for (int j = 0; j < 

// first, the offset 
inStream. read ( (char 

snSegments [i] . taqList[j] 
// next, the TAQ 



These are stored just like the segment, 
element is the offset of the segString. 
snSegments [i] .numTAQs; j++) { 

*) &segOf f set , sizeof (unsigned short)); 
.segString = snSegString + segOffset; 

action 

inStream. read ( (char 
*) & (snSegments [i] . taqList [j] . taqAction) , sizeof (char) ) ; 

// lastly, the TAQ 

type 

inStream. read ( (char 
*) & (snSegments [i] .taqList [j] . taqType) , sizeof (char) ) ; 

} 

} 

// lastly for the segment, the 

status 

inStream. read ( (char 
*) & (snSegments [i] .status) , sizeof (unsigned char) ) ; 

} 

} 

} 

} 

else { 

// there was some sort of problem reading in the 

snLen 

// so set nameStorage to NULL so we don't try to 

free it. 

nameStorage = NULL; 

} 

} 

else { 

// there was some sort of problem reading in the gnLen 
// so set nameStorage to NULL so we don't try to free it. 
nameStorage = NULL; 

} 

} 

NHNameData : : -NHNameData ( ) 
{ 



} 



if (nameStorage != NULL) 
free (nameStorage) ; 



bool NHNameData : : archiveData (ostream &outStream) 
{ 

bool rc = true; 

// save the given name len and surname ien 

outSt ream. write ( (char *) SallocedGnLen, sizeof (allocedGnLen) ) ; 
outStream. write ( (char *) &allocedSnLen, sizeof (allocedSnLen) ) ; 

// save the actual name data 

outStream. write (nameStorage, variableNameAllocSize) ; 
// write out the number of gn segments 

outStream. write ( (char *) &numGnSegments , sizeof (numGnSegments ) ) ; 

// write out however many gn segments we need to 

// for each one, we first write out the offset (into the 

// gnSegs area) of the segString member. 

// Then, we write out the numTAQs , 

// then, the TAQs themselves 

// then, the status; 

unsigned short int segOffset; 

for (int i = 0; i < numGnSegments; i++) { 

// first, the segString offset. Check for a null 
segment, and code it 

// as -1. 

if (gnSegments [i] . segString [0] == EOS) 

segOffset = (unsigned short int)-l; 

else 

segOffset = (unsigned short 
int) (gnSegments [i] . segString - gnSegString); 

outStream. write ( (char * ) &segOf f set , sizeof ( unsigned short 

int) ) ; 

// next, number of TAQs 

outStream. write ( (char *) & (gnSegments [i] .numTAQs) , 
sizeof (unsigned char)); 

// next, the TAQs. We do a similar thing here, where we 

first write out 

// the offset of the taq's segString. 
for (int j = 0; j < gnSegments [i] . numTAQs; j++) { 
// first, the segString offset 
segOffset = (unsigned short 
int) (gnSegments [i] . taqList [j ] .segString - gnSegString); 

outStream. write ( (char * ) SsegOf f set , sizeof (unsigned 

short int) ) ; 

// next, the TAQ action 
outStream. write ( (char 
*) & (gnSegments [i] . taqList [ j ] . taqAction) , sizeof (char) ) ; 

// lastly, the TAQ type 
outStream. write ( (char 
*) & (gnSegments [i] . taqList [j ] . taqType) , sizeof (char) ) ; 
} 



// lastly for the segment, the status 
out St ream. write ( (char *) & (gnSegments [i] . status) , 
sizeof (unsigned char)); 
} 

// write out th-3 number of sn segments 

outStream. write ( (char * ) &numSnSegments , sizeof (numSnSegments ) ) ; 

// do the same thing for the sn segments 
for (i = 0; i < numSnSegments; i++) { 

// first, the segString offset 

if (snSegments [i] . segString [0] EOS) 

segOffset = (unsigned short int)— 1; 

else 

segOffset « (unsigned short 
int) (snSegments [i] .segString - snSegString); 

outStream. write ( (char * ) &segOf f set , sizeof (unsigned short' 

int) ); 

// next, number of TAQs 

outStream. write.( (char *) & (snSegments [i] . numTAQs ) , 
sizeof (unsigned char)); 

// next, the TAQs. We do a similar thing here, where we 
first write out 

// the offset of the taq's segString. 
for (int j = 0; j < snSegments [i ] .numTAQs; { 
// first, the segString offset 
segOffset = (unsigned short 
int) (snSegments [i] . taqList [j ] .segString - snSegString); 

outStream. write ( (char *) &segOf f set, sizeof ( unsigned 

short int ) ) ; 

// next, the TAQ action 
outStream. write ( (char 
*) & (snSegments [i] . taqList [j ] .taqAction) , sizeof (char) ) ; 

// lastly, the TAQ type 
outStream. write ( (char 
*) & (snSegments [i] .taqList [ j ] . taqType) , sizeof (char) ) ; 
} 

// lastly for the segment, the status 
outStream. write ( (char *) & (snSegments [i] . status) , 
sizeof (unsigned char) ) ; 
} 

return rc; 

} 



// go through the different name fields, and remove noise characters 
// Also, convert any segDelimChars to spaces 
// Also, split the name fields into segments 

void NHNameData: : preprocessName (char *noiseChars, char +segDelimChars ) 
{ 

char *inChar; 
char *outChar; 
int i; 



numGnSegments = 0; 
inChar = gn; 
outChar = gnSegString; 
* outChar = EOS; 

gnSegments [0] . segString = outChar; 
while ((*inChar != EOS) && (numGnSegments < 
NH_MAX_SEGSJ3EFORE_TAQ) ) { 

// if this is a noise character, just move on to the next 
one in the name 

if (strchr (noiseChars, *inChar) ) 

inChar++; 
else { 

if (strchr (segDelimChars, *inChar) ) ( 

// make sure this is not the next in a series 

of white spaces 

if (* (gnSegments [numGnSegments] .segString) != 

EOS) { 

// note that we know the segment." 
gnSegments [numGnSegments] .status = 



NH_NAME_F I E L D_S TAT U S_KNOWN ; 

the last segment 

segment 

number of segments 

NH MAX SEGS BEFORE TAQ) 



*outChar « EOS; // terminate 

numGnSegments++; //. look at next 
// make sure we are not past the max 
if (numGnSegments >= 



// 



break; 
inChar++; 

look at next char in name 

outChar++; 

to next available space in the output array 

gnSegments [numGnSegments] . segString 

outChar; 
segment 



// 

point 



*outChar 



EOS; 



// init the new 



} 

else 



so was the last one. 



ignore it, and move on 

> 

else { 
// 

segment we are 

// 



// this is a segDelim char, and 
inChar++; // so just 

just a regular character, so add it to the 



working on currently 
*outChar = toupper ( *inChar ) ; 
outChar++; // 
next character in segment next time. 

inChar++; 

at next char in name 



write to 
// look 



} 



> 



} 



// if we get here, it is because we reached the end of the gn 
string. 

// If we were in the middle of building a name segment, we 

should 

// terminate the segment and increase the number of segments we 



have 

if ((numGnSegments < NH_MAX_SEGS_BEFORE__TAQ) && 

( * CgnSegments [numGnSegments ] .segString) != 

EOS) ) { 

gnSegments [numGnSegments] .status = 
NH_NAME_FIELD_STATUS_KNOWN; 

*outChar = EOS; 
numGnSegments ++ ; // 

} 



// terminate the last segment 
look at next segment 



// now do the surname 
numSnSegments =0; 
inChar = sn; 
outChar = snSegString; 
• - *outChar = EOS; 

snSegments [0] . segString = outChar; 
while ((*inCha'r != EOS) && .(numSnSegments < 
NH__MAX_SEGS_BEFORE_TAQ) ) { 

// if this is a noise character, just move on to the next 
one in the name 

if (strchr (noiseChars, *inChar) ) 

inChar++; 
else { 

if (strchr (segDelimChars, *inChar) ) { 

// make sure this is not the next in a series 

of white spaces 

if {* (snSegments [numSnSegments] .segString) != 

EOS) { 

snSegments [numSnSegments ] .status = 

NH_NAME_FIELD_STATUS_KNOWN; 
the last segment 
segment 

number of segments 
NH MAX SEGS BEFORE TAQ) 



*outChar = EOS; // terminate 

numSnSegments++; // look at next 
// make sure we are not past the max 
if (numSnSegments >= 



// 



break; 
inChar++; 

look at next char in name 

outChar++; 

to next available space in the output array 

snSegments [numSnSegments] .segString = 

outChar; 
segment 



// 

point 



*outChar 



EOS; 



// init the new 



} 

else 



so was the last one. 



ignore it, and move on 

> 

else ( 



// 



// this is a segDelim char, and 
inChar++; // so just 

just a regular character, so add it to the 



segment we are 



// working on currently 
+ outChar = toupper ( *inChar ) ; 

outChar++; // write to 

next character in segment next time. 

inChar++; // look 



at next char in name 

} 

} 

} 

// if we get here, it ij* because we reached the end of the sn 
string. 

// If we were in the middle of building a name segment, we 

should 

// terminate the segment and increase the number of segments we 

have 

if ( (numSnSegments < NH_MAX _SEGS_BEFORE_TAQ) && 

( * (snSegments [numSnSegments ] .segString) != 

EOS)) { 

snSegments [numSnSegments ] .status = 
NH_NAME_FIELD_STATUS_KNOWN; 

*outChar = EOS; // terminate the last segment 

numSnSegments++; // look at next segment 

} 

// now see if there are any segments at all 

// in the fields. If* not, we should create a 

// single blank segment, and mark its status as 

// unknown. If there are segments, we need to check for the 

// special values NFN, NLN, NMN, FNU, LNU, MNU. If we find 

these, 

// blank out the segment, and set the status 
// appropriately. 

// When a name field has more than one segment, but still 

// specifies one of these values, we still blank it out, 

// but we keep the segment as a blank segment. Although the 

// digraph score for this segment will be largely determined by 

// the UNKNOWN or NONE parameter, it still gets treated as a 

// segment in that oops and anchor val can be applied, and 

// it still gets sent to best score. 

// We do not currently look across name fields for these 
markers . 

// That is, we look for NFN, NMN, FNU. MNU in the given name 

field 

// and we look for NLN and LNU in the surname field. 
// ??? Future versions may look across name fields. 

if (numGnSegments ==0) { 
numGnSegments = 1; 
gnSegments [0] . segString = ""; 

gnSegments [0] . status = N H_N AME_F I E L D__S TAT U S_UN KNOWN ; 

} 

else if (nameParms->getCheckGnUnknowns ( ) ) { 

for (i = 0; i < numGnSegments; i++) { 

if (! strcmp (gnSegments [i] . segString, "NFN")) { 
gnSegments [i] . segString [0] = EOS; 
gnSegments [i] .status = 
N H_N AME_F I E L D_S T ATUS_NON_EX I S T ANT ; 

} else if (! strcmp (gnSegments [i] . segString, 

"FNU") ) { 

gnSegments [i] . segString [0] = EOS; 
gnSegments [ i ]. status = 
NH_NAME_FIELD__STATUS_UNKNOWN; 

} else if ( ! strcmp (gnSegments [i J .segString, 

"NMN")){ 

gnSegments [i] . segString [0] = EOS; 
gnSegments [i ]. status = 



NH_NAME__FIELD_STATUS_NON__EXTSTANT; 

} else if ( ! strcmp (gnSegments [ i] . segString, 

"MNU" ) ) ( 

gnSegments [i] . segString [0] = EOS; 
gnSegments [i] . status = 
NH_NAME_FIELD_STATUS_UNKNOWN; 

} 

} 

} 

// now the sn segs 

if (numSnSegments == 0) { 

numSnSegments = 1; 

snSegments [0] . segString = ""; 

snSegments [0] .status - NH_NAME__FIELD_STATUS_UNKNOWN; 

} 

else if (nameParms->getCheckSnUnknowns ( ) ) { 

for (i = 0; i < numSnSegments; i++) { 

if ( ! strcmp (snSegments [i] .segString, "NLN") ) { 
snSegments [i] . segString [0] = EOS; 
snSegments [i] . status = 
N H_N AME_FT EL D_S TAT U S_NON_EX I S T ANT ; 

} else if (! strcmp { snSegments-f i ] . segString, 

"LNU" ) ) { 

snSegments [i] . segString [0] = EOS; 
snSegments [i] . status = 
NH_NAME_FIELD_STATUS_UNKNOWN; 

} 

} 

} 

} 



// function to go through the segments and for each one, see if 

// it is a TAQ value. If so, we associate the TAQ with the previous 

// or following segment, depending on its type (i.e. prefix, suffix, 
etc) . 

// When we store the TAQ, we also store the action associated with 

// the TAQ (currently DELETE or DISREGARD) , since this information 

// will be required to determine how to adjust the base segment score 
// 

// Deciding which segment to associate a TAQ with can get pretty 

// hairy, especially when mulitple TAQs can be in a name field 

// consecutively. We use the Following rules for single TAQ values: 

// 

// TAQ Type Segment to Associate with 

// 

// Prefix next segment 

// Suffix previous segment 

// Infix Not supported yet 

// Title next segment 

// Qualifier previous segment 

// 

// These are the basic rules for figuring out which segment to 

associate 

// TAQs with: 

// 

// - Any TAQ segments before the first Name segment are 

associated with 

// the first name segment 



// \ 

II - Any TAQ segments after the last Name segment are associated 

with 

// the last Name segment 

// 

// - For TAQs that are surrounded by Name segments : 

// 

// - All TAQs between a Name segment (on the left) and a 

suffix (qualifier) 

// (on the right) are associated with the Name Segment. 

// 

// - All TAQs not fitting the above are assoicated with the 

Name segment 

// they proceed. 

// 

void NHNameData : : processTAQValues (NHTAQTable *taqTable) 
{ 

// NHTAQAction taqAction; 

int i; 

NH_TAQRecordPtr . tempTAQList [ NH_MAX_TAQS_PER_SEGMENT ] ; 
// temp list of TAQs found 

int tempTAQSeg Index; // 

temp index for the tempTaqList 

NH_TAQRecordPtr ' tempTAQRecordPtr; // pointer to structure for 
a TAQ record 

int numTempTAQSegs ; 

// how many TAQs did we find 

int seglndex; 
// which segment are we looking, at 

int last Pre fixlndex; // 

index of last prefix like segment we got 

int lastSuf fixlndex; // 

index of last suffix like segment we got 

int lastNamelndex; 
// index of last non-TAQ segment we got 

int nameSegmentTaqList Index; 

// where to put taqs in a name segments taq list 

char *primaryCultureCode = 

nameParms->primaryCultureCode; 

char , *secondaryCultureCode = 

nameParms->secondaryCultureCode; 

// clear out the TAQ counts for each segment. 
// This is important because the TAQ segments are not 
initalized 

// if they are not filled in. 
for (i = 0; i < numGnSegments ; i++) 
gnSegments [i] . numTAQs = 0; 

if (nameParms->getSeparateGnTaqs ( ) true) { 
// init some variables 
seglndex = 0; 
numTempTAQSegs = 0; 



field, 



// Start out by looking for TAQs at the start of the name 
// before any name segments. 

// while there are TAQ values at the start of the gn 
// get their associated TAQ record and place that in 
// a temporary list. ' 
while (seglndex < numGnSegments) ( 



tempTAQRecordPtr = taqTable-' 
>getTAQSegment (gnSegments [seglndex] . segString, 



pr imaryCul tureCode , 



secondaryCultureCode) ; 

if (tempTAQRecordPtr != NULL) { 

// make sure we are not past our space for 

TAQs in the temp list 

// This would happen if a name field started 

out with tons of TAQs 

if (seglndex < NH_MAX_TAQS_PER_SEGMENT ) { 
tempTAQList [numTempTAQSegs] = 

tempTAQRecordPtr; 

n umT emp T AQ S e g s + + ; 

} 

seglndex++; 

.} 

else 

break; 

} 

// as long as we found a non-TAQ segment 
if (seglndex < numGnSegments) { 

// fill up the taqList for the first Name Segment 

with 

// each of the leading TAQs we found. If we found 

no TAQs above, 

// numTempTAQSegs will be 0, so we wont even enter 

into the loop. 

// Also, since we resticted the loop above, we are 

guaranteed to 

// not exceed our space for TAQs for a single 

segment . 

for (i - 0; i < numTempTAQSegs; i++) { 

gnSegments [seglndex] . taqList [i] .segString = 

gnSegments [i] .segString; 

gnSegments [seglndex] . taqList [i] . taqAction = 
tempTAQList [i ] ->gnAction; 

gnSegments [seglndex] . taqList [i] .taqType = 

tempTAQList [i] ->taqType; 

gnSegments [seglndex] . numTAQs += 1; 

} 

// now move all the segments back starting with 

first name segment 

// ousting the leading TAQs. If we found that the- 

first segment 

// was a name segment, we do not need to move 

anything. 

if (seglndex != 0) { 

for (i = seglndex; i < numGnSegments; 

i++) { 

gnSegments [i - seglndex] - gnSegment s [ i j ; 

} 

// note that we now have less segments, since 

we removed some segments 

// that were TAQ values 
numGnSegments -= seglndex; 



// 



now back at the begining 



also, set the seglndex to 0, since we are 



seglndex = 0; 



} 



// now start looking at the remaining segments 
// along the way, we must keep track of 
// - the index of the last Name segment 

we found (start out as 0, since we backed it up to 0) 

// - the index of the last "suffix-like" 

(starts out as -1, since all TAQs were tacked onto seg 



TAQ we found 
0) 

TAQ we found 
0) 



// - the index of the last "prefix-like" 

(starts out as -1, since all TAQs were" tacked onto seg 



// 
// 
// 
// 



If we get a: 
Name : . 



lastNamelndex + 1 and the 
// 

gnSegment [lastNamelndex] ; 

// 

the lastPref ixlndex and 
// 

segment . 

// 

the TAQ values from the gnSegment array 
// 

(lastNamelndex = seglndex; ) 

// 

many TAQs we ousted 

// 
// 

1 // 
// 
// 
// 

seglndex 

// 
// 



associate everything between the 

lastSuff ixlndex with 

associate everything -between 

seglndex - 1 with this name 

move everything back to oust 

mark the new lastNamelndex 

adjust numGnSegments for how 

"Suffix Like" 

lastPref ixlndex = - 
previous prefix now considered a suffix 
lastSuff ixlndex = seglndex 
"Prefix Like" 

lastPref ixlndex = 



End of Segments 

- associate everything between the 
lastNamelndex + 1 and seglndex 

// with gnSegment [lastNamelndex] ; 

// - adjust numGnSegments for how 

many TAQs we had at end 

// 

// Note that we do not do any storing of anything 
until we either reach the 

// end of the sements, or get a non-taq segment. 
// 

// Also, as we read TAQ segments, we store a 
pointer to their retrieved 

// structure in a list. We do this because we must 

read ahead before 

// we can store a TAQs relevant info (type, action) 
as being associated 

// with a segment, and we do not want to have to 
look up the TAQ info twice. 



numTempTAQSegs = 0; 
lastPref ixlndex = -1; 
lastSuf f ixlndex = -1; 
lastNamelndex = seglndex; 

seglndex++; // look at the next segment 

while (seglndex < numGnSegment 5, ) { 
tempTAQRecordPtr = taqTable- 
>getTAQSegment (gnSegments [seglndex] . segString, 

primaryCultureCode, 

secondaryCultureCode ) ; 

if (tempTAQRecordPtr. ==* NULL) ■{ ■ 
// segment is not a TAQ -value 

// do an initial check to make sure we 

actually "got one or more TAQs . 

// if not, all we really have to do is 

just reflect the new value for 

// lastNamelndex. 

if (numTempTAQSegs > 0) { 

// so associate all taqs between 

the previous Name segment and 

// the last suffix with the 
previous Name Segment. Since lastSuff ixlndex 

// may be -1 (if there we not 
suffixes), we may not even enter this for loop. 

// this variable is necessary 

because the segment at lastNamelndex 

// might already have TAQs stored 

in its taqList (due to prefixes) . 

// We must keep track of where 
the next available place in that list is. 

nameSegmentTaqListlndex = 

gnSegments [lastNamelndex] . numTAQs ; 

tempTAQSeglndex = 0; 

for (i = lastNamelndex + 1; (i <= 
lastSuf fixlndex) && (nameSegmentTaqListlndex < NH_MAX_TAQS_PER_SEGMENT ) ; 
i++) { 

gnSegments [lastNamelndex] .taqL 
ist [nameSegmentTaqListlndex] .segString = gnSegments [i] .segString; 

gnSegments [lastNamelndex] .taqL 
ist [nameSegmentTaqListlndex] .taqAction = tempTAQList [tempTAQSeglndex] - 
>gnAction; 

gnSegments [lastNamelndex] .taqL 
ist [nameSegmentTaqListlndex] .taqType = tempTAQList [ tempTAQSeglndex] - 
>taqType; 

tempTAQSegIndex++; 
nameSegmentTaqListIndex++; 
gnSegments [lastNamelndex] . numT 

AQs += 1; 

} 

// associate everything at or 

past the pre.vious prefix (s) with the name 

// segment we just found. Again, 

since there may not have been any 

// prefixes, we might not even 



enter this for loop 

if (lastPrefixIndex != -1) { 

for (i = lastPrefixIndex; (i < 
seglndex) && (tempTAQSeglndex < NH_MAX_TAQS_PER_SEGMENT) ; i++) { 

gnSegments [seglndex] . taq 
Listfi - lastPrefixIndex] . segString = gnSegments [i ]. segString; 

gnSegments [seglndex] .taq 
List[i - lastPrefixIndex] . taqAction = tempTAQList [ tempTAQS eg Index ] - 
>gnAction; 

gnSegments [seglndex] .taq 
List[i - lastPrefixIndex] . taqType = tempTAQList [ tempTAQSeglndex] - 



>taqType; 
TAQs += 1; 

starting with this segment and 
We move them back to the first 



tempTAQSegIndex++; 
gnSegments [seglndex] . num 



} 



) 

// 
// 
// 



now move all the segments back 
ending with the last segment, 
segment after the previous 



Name segment, which is numTempTAQSegs places 

for (i = seglndex; i < 



numGnSegments ; i++) 
= gnSegments [ i ] ; 

numGnSegments ; i++ ) 
numTempTAQSegs] ; 

numTempTAQSegs ; 



{ 



gnSegments [i - numTempTAQSegs] 



} 



//for (i = lastNamelndex + 1; i < 

{ 

// gnSegments [i] = gnSegments [i + 
//} 

numGnSegments -= 
// we not have less segments, since we got 



// 



rid of some TAQs 



numTempTAQSegs ; 
too 

0; 

the temp segment array 



seglndex; 
lastNamelndex 



seglndex -= 

// move our pointer back 



numTempTAQSegs = 



II 



clear out 



lastNamelndex = 

// mark the new 



} 

else { 

if ( (tempTAQRecordPtr->taqType == • P ' ) || 
(tempTAQRecordPtr->taqType == 1 T 1 ) ) { 

// got a prefix or a title 
tempTAQList [numTempTAQSegs] = 

tempTAQRecordPtr; 

numTempTAQSegs++; 

// only set the prefix index if 



we do not have one on record, 
the right most prefix in a string 



} 



// 



otherwise, we will only get 



// of consecutive prefixes, 
if (lastPref ixlndex == -1) 

lastPref ixlndex = seglndex; 



tempTAQRecordPtr; 
1; 

segment 

sure that any 
last name segment . 

one or more TAQs . 
the new value for 



// 



} 

// 
// 

// 
// 



else ( 

// must be a suffix or qualifier 
tempTAQList [numTempTAQSegs ) = 

numTempTAQSegs++; 
lastPref ixlndex = - 
any previous prefixes now considered a suffix 
lastSuff ixlndex = seglndex; 

} 

} 

seglndex++; // look at next 



now we are at the end of all segments, so make 
TAQs that were trailing get associated with the 

do an initial check to make sure we actually got 
if not, all we really have to do is just reflect 



// las tName Index . 

if (numTempTAQSegs > 0) { 

// associate all the stored taqs with the 

last name segment. 

// in the loop below: 

// i is the index into the gnSegments 

list for the TAQ string we are copying 

// tempTAQSeglndex is the index into 

the tempTAQList for the saved TAQ info 

// lastNamelndex is the index into the 

gnSegments for the name getting 

// the TAQs associated with it. 

// gnSegmentTaqListlndex is the index 

into the taqList for the name getting 

// the TAQs associated with it. 

// 

// We must be careful that we do not 
overwrite any TAQs already associated with 

// the name {from prefixes). For this 
reason, we use separate indexes for the 

// tempTAQList and the gnSegments' taqList. 

nameSegmentTaqListlndex = 
gnSegments [lastNamelndex] . numTAQs ; 

tempTAQSeglndex = 0; 

for (i = lastNamelndex + 1; (i < numGnSegments ) 
&& (nameSegmentTaqListlndex < NH_MAX_TAQS_PER_SEGMENT) ; i++) { 

gnSegments [lastNamelndex] . taqList [nameSegm 
entTaqListlndex] . segString = gnSegments ( i] . segString; 

gnSegments [lastNamelndex] . taqList [nameSegm 
entTaqListlndex] .taqAction = tempTAQList [ tempTAQSeglndex ] ->gnAct ion ; 



gnSegments [lastNamelndex] . taqList [nameSegm 
entTaqListlndex] .taqType = tempTAQList [tempTAQS eg Index] ->taqType; 

tempTAQSegIndex++ ; 

name Segment TaqList I ndex-i- + ; 

gnSegments [lastNamelndex] . numTAQs += 1; 

} 

// now we can just chop off all the TAQ 
segments by reducing numGnSegments . 

numGnSegments -= numTempTAQSegs; 

} 

} 

else { 

// we did not get any Non-TAQ segments . Move all 
the segments to the TAQ 

// list for the first segment, create a single 
segment, and set its string 

// . value to . 

gnSegments [0] .numTAQs = 0; // set this in case 
there were no TAQs {empty string) 

// In that case, we would not have 

cleared it out orignally 

for (i = 0; i < numTempTAQSegs; i++) { 
gnSegments [0] . taqList [i] . segString « 

gnSegments [i] .segString; 

gnSegments [0] . taqList [i] .taqAction = 
tempTAQList [i] ->gnAction; 

gnSegments [0] .taqListfi] .taqType = 

tempTAQList [i] ->taqType; 

gnSegments [0] .numTAQs += 1; 

} 

numGnSegments = 1; 
gnSegments [0] . segString = ""; 

gnSegments [0] .status = NH_NAME_FIELD_STATUS_UNKNOWN ; 

) 

) 

II as a last step, we must make sure that the number of 
gnSegments is 

// now no greater than NH_MAX__SEGS_AFTER_TAQ . We just ignore 
any segments 

// after the max. 

if (numGnSegments > N H_MAX_S E G S _A FT E R__T AQ ) 

numGnSegments = NH_MAX_SEGS_AFTER_TAQ; 

// clear out the TAQ counts for each segment. 
// This is important because the TAQ segments are not 
initalized 

// if they are not filled in. 
for (i = 0; i < numSnSegments ; i++) 
snSegments [i] . numTAQs = 0; 

// Now do the SN segments 

if (nameParms->getSeparateGnTaqs ( ) == true) { 
// init some variables 
seglndex = 0; 
numTempTAQSegs = 0; 



// Start out by looking- for TAQs" at the start of the name 

field, 

// before any name segments. 

// while there are TAQ values at the start of the sn 
// get their associated TAQ record and place that in 
// a temporary list, 
while (seglndex < numSnSegments ) { 
tempTAQRecordPtr = taqTable- 
>getTAQSegment ( snSegments [seglndex] . segString, 



primaryCultureCode, 



secondaryCultureCode) ; 

if (tempTAQRecordPtr != NULL) { 

// make sure we are not past our space for 

TAQs in the temp list 

// This would happen if a name field started 

out with tons of TAQs 

if (seglndex < NH_MAX__TAQS_PER_SEGMENT ) { 
tempTAQList [numTempTAQSegs] = 

tempTAQRecordPtr; 

numTempTAQSegs++; 

> 

seglndex++; 

} 

else 

break; 

} 

// as long as we found a non-TAQ segment 
if (seglndex < numSnSegments) { 

// fill up the taqList for the first Name Segment 

with 

// each of the leading TAQs we found. If we found 

no TAQs above, 

// numTempTAQSegs will be 0, so we wont even enter 

into the loop. 

// Also, since we resticted the loop above, we are 

guaranteed to 

// not exceed our space for TAQs for a single 

segment . 

for (i = 0; i < numTempTAQSegs; i++) { 

snSegments [seglndex] .taqList [i] .segString = 

snSegments [i] .segString; 

snSegments [seglndex] .taqList [i] . taqAction = 
tempTAQList [i] ->snAction; 

snSegments [seglndex] . taqList [i] . taqType = 

tempTAQList [i] ->taqType; 

snSegments [seglndex] . numTAQs += 1; 

} 

// now move all the segments back starting with 

first name segment 

// ousting the leading TAQs. If we found that the 

first segment 

// was a name segment, we do not need to move 

anything. 

if (seglndex != ,0) { 

for (i = seglndex; i < numSnSegments; 



} 

// 



snSegments [i - seglndex] = snSegments [ i ] ; 
note that we now have less segments, since 



we removed some segments 



// that were TAQ values 
numSnSegments -= seglndex; 



// 



also, set the seglndex to 0, since we are 



now back at the begining 



seglndex = 0; 



} 



// now start looking at the remaining segments 
// along the way, we must keep track of 
// - the index of the last Name segment 

we found (start out as 0, since we backed it up to 0) 

II - the index of the last "suffix-like" 

(starts out as -1, since all TAQs were tacked onto seg 



TAQ we found 
0) 

TAQ we found 
0) 



// - the index of the last "prefix-like" 

(starts out as -1, since all TAQs were tacked onto seg 



// 
// 
// 
// 



If we get a: 
Name : 



lastNamelndex + 1 and the 

// 

snSegment [lastNamelndex] ; 

// 

the lastPref ixlndex and 

// 

segment . 

// 

the TAQ values from the snSegment array 

// 

(lastNamelndex = seglndex;) 

// 

many TAQs we ousted 

// 
// 

1 // 

// 
// 
// 

seglndex 



associate everything between the 
lastSuff ixlndex with 
associate everything between 
seglndex - 1 with this name 
move everything back to oust 
mark the new lastNamelndex 
adjust numSnSegments for how 



"Suffix Like" 

lastPref ixlndex - - 
previous prefix now considered a suffix 
lastSuff ixlndex = seglndex 
"Prefix Like" 

lastPref ixlndex = 



// End of Segments 

// - associate everything between the 

lastNamelndex + 1 and seglndex 

// with snSegment [lastNamelndex] ; 

// - adjust numSnSegments for how 

many TAQs we had at end 

// 

// Note that we do not do any storing of anything 
until we either reach the 

// end of the sements, or get a non-taq segment. 

// 

// Also, as we read TAQ segments, we store a 
pointer to their retrieved 

// structure in a list. We do this because we must 



read ahead before " 

// we can store a TAQs relevant info (type, action) 
as being associated 

// with a segment, and we do not want to have to 
look up the TAQ info twice. 

numTempTAQSegs = 0; 
lastPref ixlndex = -1; 
lastSuf f ixlndex - -1; 
lastNamelndex = seglndex; 

seglndex++; ■ // look at the .next segment 

while (seglndex < numSnSegments ) { 
tempTAQRecordPtr = taqTable- 
>getTAQSegment ( snSegments [ seglndex] . segString, 

pr imaryCul tureCode , 

secondaryCultureCode) ; 

if (tempTAQRecordPtr == NULL) { 

// segment is not a TAQ value 

// do an initial check to make sure we 

actually got one or more TAQs. 

// if not, all we- really have to do is 

just reflect the new value for 

// lastNamelndex. 

if (numTempTAQSegs > 0) { 

// so associate all taqs between 

the previous Name segment and 

// the last suffix with the 
previous Name Segment. Since lastSuff ixlndex 

// may be -1 (if there we not 
suffixes), we may not even enter this for loop. 

// this variable is necessary 

because the segment at lastNamelndex 

// might already have TAQs stored 

in its taqList (due to prefixes) . 

// We must keep track of where 

the next available place in that list is. 

nameSegmentTaqListlndex = 

snSegments [lastNamelndex] . numTAQs ; 

tempTAQSeglndex = 0; 

for (i = lastNamelndex +1; (i <= 
lastSuffixIndex) && (nameSegmentTaqListlndex < NH_MAX_TAQS_PER_SEGMENT ) ; 
i + +) { 

snSegments [lastNamelndex] . taqL 

ist [nameSegmentTaqListlndex] .segString = snSegments [ i ] .segString; 

snSegments [lastNamelndex] .taqL 
ist [nameSegmentTaqListlndex] .taqAction = tempTAQList [ tempTAQSeglndex] - 
>snAction; 

snSegments [lastNamelndex] . taqL 
ist [nameSegmentTaqListlndex] . taqType = tempTAQList [tempTAQSeglndex] - 
>taqType; 

t empTAQSeg Index* + ; 
nameSegmentTaqListIndex++; 
snSegments [lastNamelndex] . numT 

AQs += 1; 



} 

// associate everything at or 
past the previous prefix (s) with the name 

// segment we just found. Again, 

since there may not have been any 

// prefixes, we might not even 

enter this for loop 

if (lastPref ixlndex != -1} ( 

for (i = lastPref ixlndex; (i < 
seglndex) && { tempTAQSeg Index < NH_MAX_TAQS_PER_SEGMENT) ; i++) { 

snSegments [seglndex] . taq 
Listfi - lastPref ixlndex] . segString = snSegments [i] . segString; 

snSegments [seglndex] .taq 
List[i - lastPref ixlndex] . taqAction = tempTAQList [ tempTAQSeglndex] - 
>snAction; 

snSegments [seglndex] .taq 
List[i - lastPref ixlndex] .taqType = tempTAQList [tempTAQSeglndex ] - 
>taqType; 

tempTAQSegIndex++; 
snSegments [seglndex] . num 



TAQs += 1; 



starting with this segment and 
We move them back to the first 



} 

// 
// 
// 



} 



now move, all the segments back 
ending with the last segment . 
segment after the previous 



Name segment, which is numTempTAQSegs places 

for (i = seglndex; i < 

numSnSegments ; i + + ) { 

snSegments [i - numTempTAQSegs] 

= snSegments [i] ; 

} 



numTempTAQSegs ; 



numSnSegments -= 
// we not have less segments, since we got 



// 



rid of some TAQs 



numTempTAQSegs ; 
too 

0; 

the temp segment array 



seglndex; 
lastNamelndex 



seglndex -= 

// move our pointer back 



numTempTAQSegs = 



// 



clear out 



} 

lastNamelndex = 

// mark the new 



} 

else { 

if ( (tempTAQRecordPtr->taqType == 'P') II 
(tempTAQRecordPtr->taqType == f T')) { 

// got a prefix or a title 
tempTAQList [numTempTAQSegs] = 

tempTAQRecordPtr; 

numTempTAQSegs++; 



we do not have one on record, 
the right most prefix in a string 



} 



else 



// only set the prefix index if 

// otherwise, we will only get 

// of consecutive prefixes, 
if (lastPref ixlndex == -1) 

lastPref ixlndex = seglndex; 



// must be a suffix or qualifier 
tempTAQList [numTempTAQSegs] = 



tempTAQRecordPtr; 

1; // 



segment 

sure that any 
last name segment 

one or more TAQs . 
the new value for 

last name segment, 



} 

// 
// 

// 
// 



numTempTAQSegs++; 
lastPref ixlndex = - 
any previous prefixes now considered a suffix 
lastSuff ixlndex = seglndex; 

} 

} 

seglndex++; // look at next 



now we are at the end of all segments, so make 
TAQs that were trailing get associated with the 

do an initial check to make sure we actually got 
if not, all we really have to do is just reflect 



// lastNamelndex. 

if (numTempTAQSegs > 0) { 

// associate all the stored tags with the 



// in the loop below: 
// i is the index into the snSegments 

list for the TAQ string we are copying 

// tempTAQSeglndex is the index into 

the tempTAQList for the saved TAQ info 

// lastNamelndex is the index into the 

snSegments for the name getting 

// the TAQs associated with it. 

// snSegmentTaqListlndex is the index 

into the taqList for the name getting 

// the TAQs associated with it. 

// 

// We must be careful that we do not 
overwrite any TAQs already associated with 

// the name (from prefixes) . For this 
reason, we use separate indexes for the 

// tempTAQList and the snSegments' taqList. 

nameSegmentTaqListlndex = 
snSegments [ lastNamelndex] . numTAQs ; 

tempTAQSeglndex = 0; 

for (i = lastNamelndex + 1; (i < numSnSegments ) 
&& (nameSegmentTaqListlndex < NH_MAX_TAQS_PER_SEGMENT} ; i++) { 

snSegments [lastNamelndex] . taqList [nameSegm 
entTaqListlndex] . segString - snSegments [i] . segString; 

snSegments [lastNamelndex] . taqList [nameSegm 



entTaqListlndex] .taqAction = tempTAQList [ tempTAQS eg Index] ->snAction; 

snSegments [iastNamelndex] . taqList [nameSegm 
entTaqListlndex] .taqType = tempTAQList [ tempTAQSeglndex] ->taqType ; 

tempTAQSeglndex-f + ; 

nameSegmentTaqListIndex++; 

snSegments [IastNamelndex] . numTAQs += 1; 

} 

// now we can just chop off ail the TAQ 
segments by reducing numSnSegments . 

numSnSegments -= numTempTAQSegs; 

} 

} 

else { 

// we did not get any Non-TAQ segments. Move all 

the segments to the' TAQ 

// list for the first segment, create a single 

segment, and set its string 

// value to "". 

snSegments [0] .numTAQs = 0; // set this in case 
there were no TAQs (empty string) 

.// In that case, we would not have 

cleared it out orignally 

for (i = 0; i < numTempTAQSegs; i++) { 
snSegments [0] .taqList [i] .segString = 

snSegments [i] .segString; 

snSegments [0] . taqList [i] .taqAction = 
tempTAQList [ i] ->snAction; 

snSegments [0] .taqList [i] .taqType = 

tempTAQList [i] ->taqType; 

snSegments [0] . numTAQs +- 1; 

} 

numSnSegments = 1; 
snSegments [0] .segString 
snSegments [0] .status = NH_NAME_FIELD_STATUS_UNKNOWN ; 

} 

} 

// as a last step, we must make sure that the number of 
gnSegments is 

// now no greater than NH_MAX_SEGS_AFTER_TAQ . We just ignore 
any segments 

// after the max. 

if (numSnSegments > NH_MAX_SEGS_AFTER_TAQ) 

numSnSegments = NH_MAX_SEGS_AFTER_TAQ; 

K 



_ M ii . 



// function to generate index keys for this name. 

// Each key includes a portion for the GN and a portion 

// for the SN. 

// We currently support two key lengths, 32 bits or 64 bits. 

// The GN length does not have to be the same as the SN length, 

// but GN keys generated must be the same length (similarly for 

// SN) . Thus the full key length could be: 
// 

// 64: Both GN and SN are 32 bits 



II 96: Gn is 64,- but SN is 32 

// 9*6: " Gn is 32, but SN is 64 

// 128: Both GN and SN are 64 bits 

// 

// Keys are generated by name stem segment. The first key 

// consists of a key for the first GN segment, and a key 

// for the first SN segment. The second key 

// consists of a key for the second GN segment, and a key 

// for the second SN segment. When there are a differing number 

// of GN and SN segments, the final segment of the name 

// field with the fewer number of segments is repeated. 

// Thus, the number of keys generated is given, by the formula: 

// max (numGnSegs , numSnSegs) 

// 

// We do things this way so that' a name has the same number of keys 
// • for both GN and SN, and in fact we can view the two keys as one 
// contiguous key that can be passed to comparison functions as a 
// single value. 
// 

// Note that we are talking about stem segments (TAQ segments have 

// been removed) . 

// 

// maxKeys specifies how many keys the caller can fit into 

// keyBuff. It is up to the caller to make sure that they have 

allocated 

// enough space in the keyBuff to hold maxKeys . 

unsigned char • NHNameData : : genlndexKeys ( int maxKeys, NHKeyWidth 
gnKeyWidth, 

NHKeyWidth snKeyWidth, void *keyBuff) 

{ 

int numKeysGenerated = 0; 
int gnSeglndex = 0; 
int snSeglndex = 0; 

unsigned int *keyPtr = (unsigned int *) keyBuff ; 



while (numKeysGenerated < maxKeys) { 

if ( (gnSeglndex >= numGnSegments ) && (snSeglndex >= 
numSnSegments ) ) 

break; 
else { 

numKeysGenerated++ ; 

// make sure that if one segment is now at the end, 
// we stay on the last segment 
if (gnSeglndex == numGnSegments) 

gnSeglndex — ; 
if (snSeglndex == numSnSegments) 

snSeglndex--; 

if (gnKeyWidth == NH_KEY_WIDTH__32 ) { 
// gn key length is 32 
*keyPtr = 

globalDigraphBitmapArray . get32BitKeyForToken (gnSegments [gnSeglndex] . segS 
tring) ; 

keyPtr++; // move the pointer by 4 

bytes 

} 

else { 

// gn key length is 64 



globalDigraphBitmapArray . get 64BitKeyForToken (gnS 
egments [gnSeg*Index] . segString, 



(bit__64_t *) keyPtr) ; 

keyPtr += 2; // move the pointer 

by 8 bytes 

} 

if (snKeyWidth == NHJ(EY_WIDTH_32 ) { 
// gn key length is 32 
*keyPtr = 

globalDigraphBitmapArray. get32BitKeyForToken (snSegments [ snSeglndex] . segS 
tring) ; 

keyPtr++; // move the pointer by 4 

bytes 

} * 
else { 

// gn key length is 64 

globalDigraphBitmapArray . get 64 Bit Key ForToken (snS 
egments [snSeglndex] .segString, 

(bit_64_t *) keyPtr) ; 

keyPtr += 2; // move the pointer 

by 8 bytes 

} 

/ / advance the segment indexes 

snSegIndex++; 

gnSegIndex++ ; 

} 

} 

return numKeysGenerated; 

} 



// ■ File: NHEvalNameData . cpp 

// * 

// Description : 

// 

// Implementation to the NHEvalNameData class. 

// 

// 

// History: 
// 

// 5/14/97 EFB Created 

// 9/1/97 EFB Lots of changes to support 

retaining segment scores in 

// best mode so 

that sorting can be more detailed and accurate 

// 10/31/97 EFB Made several member functions 

protected, and made perf ormComp { ) 

// a friend of 

NHQueryNameData . Also changed ; per formComp to 

// NOT delete 

objects that are not passed on to the resultslist, 

// to 

accomodate the new method of deleting NHEvalNameData objects. 

// 11/03/97 EFB Added a new function, 

calcNameScore ( ) and made it virtual. 

// removed 
virtual from performComp. The perform comp method 

// was too 

complicated to be subclassed. We really only want 

// callers to 

be able to affect the name score and the determination 

// . of 

HIT/NO HIT. These are now the only virtual functions. Both 

// ~ are now 

inline in the header file so the caller knows exactly 

// what is 

happening in these functions if they decide to subclass 

/ / and 

override. OOPS, I forgot compareScore ( ) , which is also 

// virtual - we 

want them to be able to change how hits are sorted. 

// 

// 3/02/98 EFB Made lots of changes necessary 

when I moved a bunch of 

// parameters 
(the ones associated with parsing the name) 

// from the 

NHCompParms class into a new class called NHNameParms. 

// and renamed 

the NHCompParms class to NHCompParms. 

// 3/20/98 EFB Changed names to NH from SN 



#include <string . h> 
#include <stdio.h> 
tinclude <stdlib.h> 



#include "NHEvalNameData . hpp" 

#include "NHQueryNameData . hpp" 

#include "NH_util . hpp" 

#include "NH_queens_arrays . hpp" 



ttinclude "NHVariantTable . hpp" 

iinclude "'NHResultsList . hpp" 

#include "NHTAQTable . hpp" 

tinclude "NHNameParms . hpp" 



// private, non-member function prototype 

static double NH_digraph_score (char *qSeg, int qSegLen, 

char *evalSeg, int evalSegLen, 

bool useLef tDigraphBias } ; 

static double NH_best_score ( int numQSegs, int numEvalSegs , 

NHSegScoreMode scoreMode, • 

double 

scores [NH_MAX_SEGS_AFTER_TAQ] [NH_MAX_SEGS_AFTER_TAQ] ) ; 

void NH_best_score_f or_highest_mdde ( int xDim, int yDim, double 
highestScore, 

double 

*bestSegScores, 

double 

scores [NH_MAX_SEGS__AFTER_TAQ] [NH_MAX_SEGS_AFTER_TAQ] ) ; 

static double NH_calc_score ( SegList qSegs, int numQSegs, 

SegLis 

t evalSegs, int numEvalSegs, 

SegLis 

tVariants querySegmentVariants, 

char 

*primaryCulture , 

char 

*secondaryCulture , 

NHComp 

Parms *compParms, 

NHName 

Parms *nameParms , 

. NHName 

Fields nameField, 

char 

* or igQName Field, 

char 

*origEvalNameField, 

int 

*numSegs Scored, 

double 

*bestSegScores ) ; 

static void NH_apply_TAQs_to_score (double MiScore, Segment *qSeg, 

Segment *evalSeg, 

double absDelTAQFactor, 
double absDisTAQFactor, 
double delTAQFactor, 
double disTAQFactor) ; 
static bool NH_check_compressed_name (char *qSegString, char 



+ evalSegString, 



char *compressCharsPart 1 , 
char *compressCharsPart2 ) ; 



NHEvalNameData : : NHEvalNameData (NHNameParms *nParms, char *aGn, char 
*aSn) : 

NHNameData {nParms, aGn, 

aSn) 
{ 

resetScores ( ) ; 

} 

NHEvalNameData: : NHEvalNameData (NHNameParms *nParms, char *aGn, char 
*aSn, char *aMn) : 

NHNameData (nParms, aGn, 

aSn, aMn) 
{ 

' resetScores ( ) ; 

} 



NHEvalNameData: : NHEvalNameData (NHNameParms *nParms, char *name, 
NHNameFormat nameFormat) : 

NHNameData (nParms , name 

nameFormat) 
{ 

resetScores ( ) ; 

} 



// constuct an object from an archived representation in 

// a stream. 

// 

// The archive is in the following order 
// 

// gnLen 
// snLen 
// nameStorage 

NHEvalNameData: : NHEvalNameData (NHNameParms *nParms, istream SinStream) 

NHNameData (nParms, 

inStream) 
{ 

// read the gn, sn and name scores 
if (inStream) 

inStream. read ( (char + )&gnScore, sizeof ( gnScore ) ) ; 
if (inStream) 

inStream. read ( (char *) &snScore, sizeof (snScore) ) ; 
if (inStream) 

inStream. read ( (char * ) &nameScore, sizeof (nameScore) ) ; 

// seg differentials 
if (inStream) 

inStream. read ( (char + ) SgnSegDif f erential, 
sizeof (gnSegDif f erential) ) ; 
if (inStream) 

inStream. read ( (char + ) &snSegDiff erential, 
sizeof (snSegDif ferential) ) ; 



// read the number of gn segs scored, and however many scores 
we need inStream . read ({ char * ) SnumGnSegsScored, 

sizeof (numGnSegsScored) ) ; 
if (inStream) 

inStream. read ( (char * ) & numGnSegsScored, 
sizeof (numGnSegsScored) ) ; 
if (inStream) { 

if (numGnSegsScored > 0) { 

inStream. read ( (char * ) gnSegScores , numGnSegsScored * 

sizeof (double) ) ; 

} 

} 

/ 

II read the number- of sn segs scored, and however many scores 
we need 

if (inStream) 

inStream. read ( (char *} SnumSnSegsScored, 
sizeof (numSnSegsScored) ) ; 
if (inStream) { 

if (numSnSegsScored > 0) { 

inStream. read ( (char * ) snSegScores , numSnSegsScored * 

sizeof (double ) ) ; 

} 

} 

} 



NHEvalNameData : : -NHEvalNameData ( ) 

{ 

} 

bool NHEvalNameData: : archiveData (ostream SoutStream) 
{ 

bool rc = true; 



rc = NHNameData: : archiveData (outStream) ; 

if (rc) { 

// read the gn, sn and name scores 
outStream. write ( (char *)&gnScore, sizeof (gnScore) ) ; 
outStream. write ( (char *) &snScore, sizeof (snScore) ) ; 
outStream. write ( (char *) &nameScore, sizeof (nameScore) ) ; 

// seg differentials 

outStream. write ( (char *) fcgnSegDif ferential, 
sizeof (gnSegDif f erential) ) ; 

outStream. write ( (char *) &snSegDiff erential , 
sizeof (snSegDif ferential) ) ; 

// read the number of gn segs scored, and however many 
scores we need inStream. read ( (char * ) SnumGnSegsScored, 
sizeof (numGnSegsScored) ) ; 

outStream. write ( (char *) &numGnSegsScored, 
sizeof (numGnSegsScored) ) ; 

if (numGnSegsScored > 0) { 

outStream. write ( (char *) gnSegScores, numGnSegsScored 

sizeof (double) ) ; 

} 



// read the number 
scores we nee'd 

out St ream. write ( (char 
sizeof (numSnSegsScored) ) ; 

if (numSnSegsScored > 
outStream. write 

sizeof (double ) ) ; 

} 

} 

return rc; 



of sn segs scored, and however many 
* ) ^numSnSegsScored, 
0} { 

((char * ) snSegScores, numSnSegsScored * 



// note that this function is a friend' of NHQueryNameData, which is 
// why we are able to access private member functions of that class, 
void inline NHEvalNameData :: calcComponentScores (NHQueryNameData 
*queryName) 

{■■-■■ 

char *primaryCulture = nameParms- 

>primaryCultureCode; 

char ^secondaryCulture = nameParms- 

>secondaryCultureCode; 

// do the digraph compare and set the scores 
gnScore = NH_calc_score (queryName->gnSegments queryName- 
>numGnSegments , 



ents, numGnSegments , 
ame->gnSegmentVariants , 
yCulture, secondaryCulture, 
rms, 
rms, 

ST_NAME , 
ame->gn, gn, 
SegsScored, 
cores ) ; 

snScore = NH__calc_score (queryName->snSegments , queryName- 
>numSnSegments , 

ents, numSnSegments , 

ame->snSegment Variants, 

yCulture, secondaryCulture, 

rms , 

rms, 

T_NAME, 

ame->sn, sn, 



gnSegm 
queryN 
primar 
compPa 
name Pa 
NH_FIR 
queryN 
&numGn 
gnSegS 

snSegm 
queryN 
primar 
compPa 
namePa 
NH_LAS 
queryN 



&numSn 

SegsScored, 

snSegS 

cores) ; 
} 



// note that this function is a friend of NHQueryNameData, which is 
// why we are able to access private member functions of that class. 
NHReturnCode NHEvalNameData : : perf ormComp (NHQueryNameData 

*queryName, 

NHCompParms 

*someCompParms ) 
{ 

NHReturnCode compResult; 
NHResultsList * result List; 

// save the .compParms so that they can be easily referenced 
// throughout the comparison process. 
compParms = someCompParms ; 

calcComponentScores (queryName) ; 

// call a method to calculate the name score. 
calcNameScore { ) ; 

// store the segments differentials, in case we get a tie 

score . 

gnSegDif f erential = abs (numGnSegments - queryName- 
>getNumGnSegments ( ) ) ; 

snSegDiff erential = abs ( numSnSegnients - queryName- 
>getNumSnSegments ( ) ) ; 

// Now call the getCompResult ( ) function to get the return 

value 

// (i.e. was it a match?) 
compResult = getCompResult () ; 

// now see if we are working with a results list 
resultList = queryName->getResultsList ( ) ; 
if (resultList !- NULL) { 

// we are using a result list. If this is a hit, add it 

// to the result list. 

// Otherwise, delete it 

if (compResult == NH_MATCH) { 

NHReturnCode tempInsertResult ; 

// make sure the insert works. If so, don't mess 

// the compResult, so the comparison will be 

// as a hit. If there was an error, delete this 

// and save the error code so it can be returned. 
tempInsertResult = resultList->addHit ( this ) ; 
if (tempInsertResult != NH_SUCCESS) { 
compResult = tempInsertResult; 

} 

} 



with 

returned 
object, 



} 

return compResult ; 



// used only when the segment mode is set to HIGHEST. 

// It compares the segment scores the v.'ere retained when 

// the name was compared to the query name. 

// We are comparing the segment scores for two (pre-scored) 

// eval names. The comparison should find which name has 

// the "best" set of segment scores, where best is defined 

// as "the one with the highest best score". If the best 

// score results in a tie, we move on to the second best score, 

// and so on until we find a difference, or there are no more 

// segments to compare. Each name has variables numGnSegsScored 

// and numSnSegsScored, that tell how many, segments were scored 

// in the name. We do up to N comparisons, where N is the larger 

// of the number of segments scored in each name. Where one name 

// has less segments scored than the other, a default value of 

// NH_DE FAULT_MI SSI NG_S EGMENT_S CORE is assigned. This is so that 

// a scored segment has to beat some threshold to be considered 

// better than nothing at all. 

// 

double NHEvalNameData : : compareSegmentScores (NHEvalNameData 

*scoredName, NHNameFields nameField) 

{ 

double scoreDiff; 

int maxComparisons; 

double * thisEvalScores; 

double *compEvalScores; 

int numSegsScoredForThisEval ; 

int numSegsScoredForCompEval; 



if (nameField NH_LAST_NAME) { 
thisEvalScores = snSegScores; 
compEvalScores = scoredName->snSegScores ; 
numSegsScoredForThisEval = numSnSegsScored; 
. numSegsScoredForCompEval = scoredName->numSnSegsScored; 

} 

else { 

thisEvalScores = gnSegScores; 
compEvalScores = scoredName->gnSegScores ; 
numSegsScoredForThisEval = numGnSegsScored; 
numSegsScoredForCompEval = scoredName->numGnSegsScored; 

> 

maxComparisons = numSegsScoredForThisEval > 
numSegsScoredForCompEval ? numSegsScoredForThisEval : 
numSegsScoredForCompEval ; 

for (int i = 0; i < maxComparisons; i++) { 
if (i >= numSegsScoredForThisEval) 

thisEvalScores [i] = NH_DEFAULT_MISSING_SEGMENT_SCORE; 
else // we can do an else because only one segment 

can be missing, not both 

if (i >= numSegsScoredForCompEval) 
compEvalScores [ i ] = 
NH DEFAULT MISSING SEGMENT SCORE; 



scoreDiff = compEvalScores [ i ] - thisEvalScores [ i ] ; 
if (scoreDiff != 0) 



break; 



return scoreDiff; 



^ + **** + + + + * + + + + **** + * + + + Tir + **** + + 
* + * * J 

/* NH_calc_score 

Performs a string comparison on two name fields. 

Returns a value between 0.00 and 
1.00, with 1.00 being an exact-fit 



*/ 

double NH_calc_score ( SegList qSegs, int numQSegs, 
t evalSegs, int numEvalSegs, 
tVariants querySegment Variants, 

*primaryCulture, 



Parms *compParms , 

Parms *nameParms, 

Fields nameField, 

*origQNameField, 

*origEvalNameField, 

*numSegs Scored, 

*bestSegScores) 
{ 

NHAnchorSegMode 

NHSegScoreMode 

double 

double 

double 

double 

double 

bool 

double 

double 



re; 



^secondaryCulture, 



SegLis 

SegLis 

char 

char 

NHComp 

NHName 

NHName 

char 

char 

int 

double 



anchorSeg; 
scoreMode; 



bool 

// double 
bool 
double 
• double • 
double 
double 

TAQ] [NH_MAX_SEGS_AFTER_TAQ] ; // 
int 



oopsFactor ; 

absDelTAQFactor; 

absDisTAQFactor; 

delTAQFactor; 

disTAQFactor ; 

matchlnit ; 

initScore; 

initialOnlnitialMatchSco 

checkVariant ; 

variantScore; 
lef tDigraphBias ; 
anchorFactor ; 
nameUnknownScore ; 
noNameScore; 
scoresTable [NH_MAX_SEGS_AFTER_ 
scores for segment pairs 
qlndex; 



// 



temp index for query segments 



j_ nt evallndex; // 

temp index for eval segments 

int qSegLen; 
// hold string length of query segment 

j_ nt evalSegLen; / / 

hold string length of eval segment 

double diScore = 

0.0; // temp score for single pair comparison 

double hiScore = 

0.0; // temp score to hold best score as we iterate, 

// which lets us avoid 

best score in mode=BEST 

bool are Variants ; 

// temp flag to hold if the pair are variants 

double returnValue = 0.0; 

NHVariantTable *variantTable ; 

double varScore; 

NHVarld evalSegVarld 

bool scoreTaqs; 

double compressedNameScore; 

fr QO l checkCompressedName; 

// set some paramters based on the name field 
if (nameField == N H_L AS T__N AME ) { 

anchorSeg = compParms->getSnAnchorSegmentMode ( ) ; 
scoreMode = compParms->getSnSegmentScoreMode ( ) ; 
oopsFactor = compParms->getSnOOPSFactor ( ) ; 
matchlnit = compParms->getMatchSnIntial { ) ; 
initScore = compParms->getSnInitialScore ( ) ; 

initialOnlnitialMatchScore = compParms- 
>getSnInitialOnInitialMatchScore ( ) ; 

checkVariant - compParms->getCJseSnVariants { ) ; 

anchorFactor = compParms->getSnAnchorFactor ( ) ; 

leftDigraphBias = compParms->getUseSnLef tBias { ) ; 

nameUnknownScore = compParms->getLNUScore ( ) ; 

noNameScore = compParms->getNLNScore { ) ; 

scoreTaqs = compParms->getScoreSnTAQs ( ) ; 

absDelTAQFactor = compParms->getAbsDelSnTAQFactor ( ) ; 

absDisTAQFactor = compParms->getAbsDisSnTAQFactor ( ) ; 

delTAQFactor = compParms->getDelSnTAQFactor ( ) ; . 

disTAQFactor = compParms->getDisSnTAQFactor ( ) ; 

compressedNameScore = compParms->getSnCompressedNameScore ( ) 

checkCompressedName = compParms->getCheckSnCompressedName ( ) 

variantTable = nameParms->snVariantTable ; 

} 

else { 

anchorSeg = compParms->getGnAnchorSegmentMode { ) ; 
scoreMode = compParms->getGnSegmentScoreMode ( ) ; 
oopsFactor = compParms->getGnOOPSFactor ( ) ; 
matchlnit = compParms->getMatchGnIntial ( ) ; 
initScore = compParms->getGnInitialScore ( ) ; 

initialOnlnitialMatchScore = compParms- 
>getGnInitialOnInitialMatchScore ( ) ; 

checkVariant = compParms->getUseGnVariants ( ) ; 

anchorFactor = compParms->getGnAnchor Factor () ; 

leftDigraphBias = compParms->getUseGnLef tBias {) ; 

nameUnknownScore = compParms->getFNUScore ( ) ; 

noNameScore = compParms->getNFNScore ( ) ; 



scoreTaqs = compParms->getScoreGnTAQs ( ) ; 
absDelTAQFactor = compParms->getAbsDelGnTAQFactor ( ) ; 
absDisTAQFactor = compParms->getAbsDisGnTAQFactor { ) ; 
delTAQFactor = compParms->getDelGnTAQFactor ( } ; 
disTAQFactor = compParms->getDisGnTAQFactor ( ) ; 
compressedNameScore = compParms->getGnCompressedNameScore { ) ; 
checkCompressedName = compParms->getCheckGnCompressedName ( ) ; 
variantTable = nameParms->gnVariantTable; 



} 



// clear out the scores table 
for (qlndex = 0; qlndex < NH_MAX_SEGS_AFTER_TAQ; ++qlndex) 

for (evallndex = 0; evallndex < NH_MAX_SEGS_AFTER_TAQ; ++evallndex) 
scoresTable [qlndex] [evallndex] = 0.0; 

// now go through each possible combination of segment pairs 
// (created by matching a query segment against an eval 
segment ) . 

// Store the scores in the scoresTable. 
for (qlndex = 0; qlndex < numQSegs; ++qlndex) { 

qSegLen = strlen (qSegs [qlndex] . segString) ; 

for (evallndex "= 0; evallndex < numEvalSegs; ++evallndex) {" 
evalSegLen = strlen (evalSegs [evallndex] . segString) ; 

// first check for either the query or eval segment 



being 



// blank. 

if ( (qSegLen ==0) |i (evalSegLen =-0)) { 

// We make a distinction between "unknown' 
and "none". The table below shows the 



scores 
Known - K, 



U 
N 



unknownScore 



// 

// 

// 
// 
// 

// 
// 
// 



we assign for the various combinations of 
Unknown - U, and None -N . 



I 



K 



N/A 



NoneScore 



e + 1) / 2 | 



// U [ 
(unknownScore +1) 12 
// 



unknownScore | 



(unknownScor 



wnScore + 1) / 2 



// N | NoneScore 

(NoneScore + 1} / 2 

// 



(unkno 



if (qSegs [qlndex] . status == 
NH_NAME_FIELD_STATUS_KNOWN) { 

// we should not need to check for both 
being known * 



if (evalSegs [evallndex] . status == 

NH_NAME_FIELD_STATUS_UNKNOWN) 

diScore = nameUnknownScore; 
else // must be 
NH_NAME_FIELD_STATUS_NON_EXISTANT 

diScore = noNameScore; 

} 

else if (qSegs [qlndex] . status ===== 
NH_NAME_FIELD_STATUS_UNKNOWN) { 

if (evalSegs [evallndex] . status == 

NH_NAME_FI ELD_STATUS_KNOWN ) 

diScore = nameUnknownScore; 
else if (evalSegs [evallndex] . status — 

NHJSIAME_FIELD_STATUS_UNKNOWN) 

diScore = (nameUnknownScore + 1.0) / 

2.0; 

else // must be 
NH_NAME_FIELD_STATUS_NON_EXISTANT, same score as 

// above, but we 

repeat it in case wecange behavior later 

diScore = {nameUnknownScore +1.0) / 

2.0; 

} 

else { // query must be 

N H_NAME_FI ELD_STATUS__NON_EX I ST ANT ) 

if (evalSegs [evallndex] . status == 

NH_NAME_FI ELD_STATUS_KNOWN ) 

diScore = noNameScore; 
else if (evalSegs [evallndex] . status == 

NH_NAME_FI ELD_STATUS_UNKNOWN ) 

diScore = (nameUnknownScore + 1.0) / 

2.0; 

else // must be 
NH_NAME_FI EL D_S T AT US_NON_EX I S T ANT , same score as 

// above, but we 

repeat it in case we cange behavior later 

diScore = (noNameScore + 1.0) / 2.0; 

} 

} 

else { 

// check the variants if 

// - we are supposed to 

// - we have a list of variants to 

check 

// - there is a variant for this 

segment of the query 

// Note we must check the secondary 

variants if the 

// primary check. does not find a 

variant . 

areVariants = false; 

if (checkVariant && (querySegmentVariants != 

NULL) && 

(querySegmentVariants [qlndex] ! = 

NULL) ) { 

// so see if the eval name segment has 
any variants in the variant table 

evalSegVarld = variantTable- 
>getVariant IdForName (evalSegs [evallndex] . segString) ; 

if (evalSegVarld != 

NH VAR NOT FOUND) { 



// yes, it did have some 
variants, so see if there is an intersection 

varScore = 

querySegmentVariants [qlndex] - 

>getVariantScoreForIdAndCuiture (evalSegVarld, primaryCulture ) ; 

if (varScore ! = 

NH_VARIANTS_NOT_RELATED) { 

areVariants = true; 
diScore = varScore; 

> 

else { 

// variants were not 

related, so check for the secondary 

// variant, source 

// Put a check in here to 

see if the primary culture 

// code was 
NH_CULTURE_CODE_GENERIC. If so, we can skip this check 

// since the secondary code 

is always generic 

if {strcmp (primaryCulture, 

NH_CULTURE_CODE_GENERIC) ) { 

varScore = 

querySegmentVariants [qlndex] - 

>getVariantScoreForIdAndCulture (evalSegVarld, secondaryCulture ) ; 

if (varScore != 

NH_VARI ANTS_NOT_RELATED ) { 

areVariants = 
diScore = 

} 

} 

} 

} 

} 

// now, if we did not find variants above, 

// do we have an initial and are we supposed to 

if (areVariants == false) { 

if (matchlnit && (qSegLen == 1 I I 



true; 
varScore; 



check for intials 
check them? 

evalSegLen ==1)) { 



// does the first char match ? 
if (qSegs [qlndex] . segString [0] == 
evalSegs [eval Index] . segString [0] ) { 

// if the second char 
matches, we have an initial on inital match, 

// since we know the length 

of atleast one of them is 1 . 

if (qSegs [qlndex] . segString [ 1 ] 

== evalSegs [evallndex] . segString [ 1 ] ) 

diScore = 

initialOnlnitialMatchScore; 

else // initial 

match, but one was more than a single character 

diScore = 

initScore; // so assign initScore 

} 

else 



0.0; 



diScore = 

// no match at all, since first char was off 



} 

else { 



or we shouldn't check them 

have unknowns, variants, or initials, 

comparison. 

diScore : 

NH_digraph_score {qSegs [qlndex] . segString, qSegLen, 
evalSegs [evallndex] .segString, evalSegLen, 



// else not initials 
// when here, we do not 
// so do a digraph 



} 

// 



lef tDigraphBias) ; 

// end, if (areVariants == false) 
end, else, both segs are known 



} 

(neither name is blank) 
/* 

segment parameters . 
when the segments 

multiply matches that 
segment other than 
not get applied in 
that was 
the segment that 

was not in the anchor segment would be 
penalized. Anchor Factor 

is meant more to provide a penalty when a 

(relatively) 

unimportant segment is used as the sole 

contributor to 

the score. 



Here we need to handle the oops and anchor 

oops specifies a factor to multiply by the score 

are not in the same position. 
AnchorSeg, AnchorFactor specify a factor to 

are in the same segment position, but are in a 

the stated AnchorSeg. Note that AnchorSeg does 

average mode, because otherwise a 2 segment name 

an exact match would get less than 1.0, since 



applied, since oops only 
and anchorFactor 

alignment. anchorSeg 
the left, while 
right. A value 
left (this is the 

*/ 



Note that only one of the factors may be 
gets applied to segments that are out of place, 
only gets applied to matches that are in place. 
AnchorSeg is also used to determine segment 
value 1 indicates segments should be lined up on 
value 2 indicates they should be lined up on the 
of 0 indicates they should be lined up on the 
default . 



switch (anchorSeg) { 
case 0 



anchor segment designation 
place, so apply oops 



if {qlnclex != evallndex) 

diScore * = oopsFactor; 

break; 



case 1 

segment is most important 
evallndex) // 



// 
// 

// 



no 

out of 
first 



NH_SEGMODE_AVG ) ) 
anchorFactor ; 

NH_SEGMODE_AVG 

== numEvalSegs - 1)) 
end segments 



if (qlndex 
out of place, so apply oops 

diScore *= oopsFactor; 

else 

if ((qlndex != 0) && (scoreMode !« 
// if not the first segment (anchor seg) 
diScore *= 

// apply the anchorFactor, so long as the 
break; 

// scoreMode is not 

case 2 : /* If not last-to-last" match. . . */ 

if ((qlndex == numQSegs - 1) && (evallndex 



// no modification, since both are 
see if they are in the same 



else { 
// 

position, counting back from the end 

if ( (numQSegs - qlndex) == 

(numEvalSegs - evallndex) ) 

if (scoreMode != 

NH_SEGMODE_AVG) // skip anchor factor in average seg mode 

diScore *= 

anchorFactor; // apply the anchorFactor 

else 

diScore *= oopsFactor; 

> 

break; 



// Now we need to apply the TAQ values to the 

score, 

// but only if they wanted to, and we have a score 
// greater than 0 (otherwise, factors have no 

effect) . 

if ((scoreTaqs) && (diScore > 0.0)) 

NH_apply_TAQs_to_score (&diScore, &qSegs [qlndex] , 

&evalSegs [evallndex] , 

absDelTAQFactor, absDisTAQ Factor, 

delTAQFactor, disTAQFactor ) ; 

if (numQSegs > numEvalSegs) // always store 

smaller dimension as rows 

scoresTable [evallndex] [qlndex] = diScore; 

else 

scoresTable [qlndex] [evallndex] = diScore; 



hiScpre = hiScore > diScore ? hiScore : diScore; 



} // for evallndex 
} // for qlndex 

// now figure out a composite score from all the best .scores 
// Note that for Best score, we must set the number of segments 
// that were scored, and fill an array containing those scores 

// these will be used later to sort hits) * 
// The exception to this is when either the query or the 
// eval name field has just 1 segment, in which case we only 
// score. one segment, which becomes the score (in all modes), 
if ( (numEvalSegs ~ 1) I! (numQSegs == 1)) { 
if (scoreMode == NH_SEGMODE_HIGHEST) ( 

*numSegsScored. = 1; // . note that we only 

scored 1 segment 

bestSegScores [0] = hiScore; // save the 

singly scored segment 
} 

returnValue = hiScore; 

} 

else { 

// both have more than 1 segment 

if (numQSegs > numEvalSegs) { // always call 

functions with smaller dimension as rows 

if (scoreMode == NH_SEGMODE_HIGHEST ) { 

NH_best_score_f or_highest_mode ( numEvalSegs , 
numQSegs, hiScore, bestSegScores, scoresTable); 

*numSegsScored = numEvalSegs; // note 

that we only scored numEvalSegs segments 

returnValue = hiScore; 

} 

else 

returnValue - NH_best_score (numEvalSegs, 
numQSegs, scoreMode, scoresTable); 
} 

else { 

if (scoreMode =« NH_SEGMODE_HIGHEST) { 

NH_best_score_for_highest_mode (numQSegs, 

numEvalSegs, hiScore, bestSegScores, scoresTable) ; 

*numSegsScored - numQSegs; // note 

that we only scored numQSegs segments 

returnValue = hiScore; 

} 

else s 

returnValue = NH__best_score (numQSegs , 
numEvalSegs, scoreMode, scoresTable) ; 
} 

) 

// here we need to see if we are suppoed to check compressed 

names . 

// if so, we have to call the NH_check_compressed_name ( ) 
function. 

// If that function returns true, we pick the higher of the 
// compressedScore (which is a parameter) and the current 
returnValue . 



if (checkCompressedName && - 

NH_check_compressed_name ( origQNameField, 

origEvalNameField, 

nameParms->getSegmentBreakChars ( ) , 

nameParms->getNoiseChars ( ) ) ) 
returnValue = returnValue > compressedNameScore ? 
returnValue : compressedNameScore; 

return returnValue; 

} /* NH_calc_score */ 

/* NH_check__compressed_name 

Compresses both names passed in, and sees if they are exact 
matches. 

The compression is implemented by skipping characters specified in 
compressChars . 

*/ 

bool NH__check_compressed_name (char +qSegString, char *evalSegString, 
char * compressChars Parti, 

char *compressCharsPart2 ) 

{ 

char compressedQuerySeg [NH_MAX_SEG_LENGTH + 1]; 

char compress edEvalSeg [NH__MAX_SEG_LENGTH + 1]; 

char compressChars (200 + 1] ; 
char *p; 
char *q; 



// first, combine the compressCharsPart 1 and compressCharsPart 1 
strings 

strcpy (compressChars, compressCharsPart 1 ) ; 
strcat (compressChars, compressCharsPart2 ) ; 

// compress the query segment 
for (p = qSegString, q = compressedQuerySeg; + p != EOS; p++) 
if (strchr (compressChars, *p) == NULL) 

*q++ = *p; 
*q = EOS; 

// compress the query segment 
for (p = evalSegString, q = compressedEvalSeg; *p 1= EOS; p++) 
if (strchr (compressChars, *p) == NULL) 

*q++ = *p; 
*q = EOS; 

// at this point, we are not necessarily upper cased, so ignore 

case 

// during the string copy 

return ! strcasecmp (compressedQuerySeg, compressedEvalSeg) ; 
} /* NH check_compressed_name +/ 



/* NH_best_score 

From a matrix of scores compute the highest possible 

combination 

of scores. During the evaluation of the matrix, a given row 

or 

column must provide one and only one score. 

We use a mode to determine how we calculate a score. The 

mode 

can be either N H_S EGMO DE_AVG or NH_SEGMODE_LOWEST . Both 

modes 

start out by selecting the combination of values {with no 
row or . 

column being used more than once) that gives the highest 

sum. Then, 

for mode = NH_SEGMODE_AVG, the final score is the average of 

all 

these scores. For NH_SEGMODE_LOWEST, it is the worst of 
these scores. 

If the matrix is non-square {x <> y) , our final score 

calculation 

only includes N values, where N is the lesser dimension. We 

still 

use all the possible squares in the matrix to perform our 

selection, 

but the final score does not consider parr of the matrix. 
To perform the work, we figure out which type of matrix we 

are 

dealing with (the dimensions) . We use that to select an 
array that contains 

the column indexes for each valid combination of segments 

(where 

valid means no column participates twice) . 

Our matrix always comes either as a square, or as a fat, 
short matrix. 

That is, the number of rows is always less than or equal to 
the number of 

columns. This way, we do not have to specify as many 
combination arrays, 

since we only have to cover a 2 X 3 array, and not a 3 X 2. 

Also, before this function, we see if either name has just 1 
segment, in which case we use the best score. 

*/ 

double NH_best_score (int xDim, int yDim, NHSegScoreMode scoreMode, 

double 

scores [NH_MAX_SEGS_AFTER_TAQ] [NH_MAX_SEGS_AFTER_TAQ] ) 
{ 

byte *comboIndexesPtr; // points to array that 

holds valid column index combos 

int numCominations ; 

switch (xDim) { 
case 2: 

switch (yDim) ( 

case 2: // 2 by 2 

comboIndexesPtr = twoByTwo; 



numCominations = 2; 
break; 

case 3: // 2 by 3 

comboIndexesPtr = twoByThree; 

numCominations = 6; 

break; 

case 4 : // 2 by 4 

comboIndexesPtr = twoByFour; 
numCominations = 12; 
break; 

case 5: // 2 by 5 

comboIndexesPtr = twoByFive; 
numCominations - 20; ' 
breaks- 
default: // must be greater than 5, 

so just use first five 

comboIndexesPtr = twoByFive; 
numCominations = 20; 
break; 

} 

break; 
case 3: 

switch (yDim) { 

case 3: // 3 by 3 

comboIndexesPtr = threeByThree; 

numCominations =6; 

break; 

case 4 : // 3 by 4 

comboIndexesPtr = threeByFour; 

numCominations = 24; 

breaks- 
case 5 : // 3 by 5 

comboIndexesPtr = threeByFive; 

numCominations = 60; 

breaks- 
default: // must be greater than 5, 

so just use first five 

comboIndexesPtr = threeByFive; 

numCominations = 60; 

break; 

} 

breaks- 
case 4 : 

switch (yDim) { 

case 4 : // 4 by 4 

comboIndexesPtr = fourByFour; 
numCominations = 24; 
breaks- 
case 5: // 4 by 5 

comboIndexesPtr = fourByFive; 
numCominations - 120; 
break; 

default: // must be greater than 5, 

so just use first five 

comboIndexesPtr = fourByFive; 
numCominations = 120; 
break; 

} 

break; 
case 5 : 

switch (yDim) { 



so -just use first five 



case 5: /"/ 5 by 5 

comboIndexesPtr = fiveByFive; 
numCominations = 120; 
break; 

default: // must be greater than 5, 

comboIndexesPtr = fiveByFive; 
numCominations = 120 ; 
break; 



breaks- 
default: // must be greater than 5, so just use 



first five 

is <= yDim, we do not have to 
3, etc 



// also, since xDim 
// handle 5X2, 5 X 



comboIndexesPtr = fiveByFive; 
numCominations = 120 ; 
break; 

} 

// we always use xDim matrix cells to compute the score, since 

it 

// is the smaller of the dimensions. We go through each 
combination 

// and evaluate the scores found in the scores array for the 
// particular combination of indexes. 

// Each evaluation must consider xDim values, so each pass 
through the 

// loop collects xDim values. 

// The values from the comboIndexesPtr array are the column 
indexes . 

// numCominations is the number of times we iterate through the 
loop to 

// look at a combination of elements in the score matrix. 
// 

// For example: 

// if I have a 2 X 3 matrix, I need to find the best valid 2 
segment 

// combination (since 2 is xDim) . There are 6 possible 
combinations, 

// and the column values are stored as pairs in the twoByThree 

array. 

// The row values are implicitly 0 and 1 for each pair, so I 

end up 

// checking: 

// scores [0] [twoByThree [0] ] + 

scores [1] [twoByThree [1] ] 

// scores [0] [twoByThree [2] ] + 

scores [1] [twoByThree [3] ] 

// 

scores [1] [twoByThree [5 ] ] 
// 

scores [1] [twoByThree [7] ] 
// 

scores [1] [ twoByThree [ 9 ] ] 

// scores [0] [twoByThree [10] ] + 

scores [1] [ twoByThree [ 1 1 ] ] ; 

// 

double tempScoreTotal ; 



scores [0] [twoByThree [4 ] ] + 
scores [0] [twoByThree [6] ] + 
scores [0] [twoByThree [8 ] ] + 



double • tempLowScore; 

double tempVal; 

double highestTotal = 0.0; 

double bestLowScore = 0.0; 

int comboArraylndex = 0; 

int i, row; 

for (i = 0; i < numCominations ; i++) { 
tempScoreTotal = 0.0; 
tempLowScore = 1.0; 

for (row = 0;" row < xDim; row++) { 
// get a single score 
tempVal = 

scores [row] [comboIndexesPtr [comboArraylndex] ] ; 

// now see if score is the low score for this combo 
if (tempVal < tempLowScore) 

tempLowScore = tempVal; 



combination 



// include this cell in the total for this 
tempScoreTotal +- tempVal; 

// look at next item in the combination (or the 

next combination) 

comboArrayIndex++; 
// see . if the low score is better than our previous low 

score 

if (tempLowScore > bestLowScore) 

bestLowScore = tempLowScore; 
// see if this score is higher than our previous highest 
if (tempScoreTotal > highestTotal) 

highestTotal = tempScoreTotal; 

} 

if (scoreMode == NH_SEGMODE_AVG) 
return highestTotal / xDim; 

else 

return bestLowScore; 

} 



/ * NH_best_score_f or_highest_mode 

This is a special version of NH_best_score . For a complete 
description of how the combination stuff works, see the 

comments 

for NH_best_score . 

We made this a separate function because: 

it has to return (by reference) an array of 



scores. The other 
return is 
did not 

NH_best_score by passing 
statements . 



modes only have to return a score for the name. 
The way we figure out which array of scores to 

much more involved than NH_best_score . 
Since we only do this stuff in highest mode, we 

want to slow down the processing of 

extra parameters and adding lots of "if" 



score 



This function was added so that we can figure out which 
combination of segments gives us the highest scores, with 
the highest score being most important, the next highest 

being the second most important, etc. Note that this is 
different from average score, where we are looking for the 
highest sum of scores. In that case, the higest score is no 
more important that the lowest score. In fact, the 

chosen in average mode might not even include the single 

segment score . 

To achieve our goal, we evaluate each possible combination 
index pairings. Each combination gives us an array of N 
where n is the smaller dimension in the matrix-. 
We sort each combination so that the highest score, appears 
in the array, and so on. If this is the first combination 
have evaluated, it becomes the one to beat, so we fill up 
array of scores we were passed by reference .with this array 
scores. We then go through the rest of the combinations 
for an array that beats the current one to beat. To beat 
we walk through the array, we compare the scores from each 
If they are equal, we move on to the next one. Otherwise, 
higher score wins . 

To help speed things up, we get passed in the high score, 
is the high score of the entire matrix (note this high score 
appear more than once) . We use this high score to quickly 
combinations as not being possible contenders. If, after 
a contender array, the first item is not the high score we 
passed, this combination could not possibly be the one, so 
bother copying all the array elements? 

Note that we check before entering this function to make 

both dimensions are bigger than 1. And we make sure that 
xdim is the smaller of the dimensions (or they are equal) . 



double 



double 

scores [NH_MAX_SEGS_AFTER_TAQ] [NH_MAX_SEGS_AFTER__TAQ] ) 
{ 

byte ''comboIndexesPtr; // points to array that 

holds valid column index combos 

int numCominat ions ; 

switch (xDim) { 
case 2: 

switch {yDim) { 

case 2: // 2 by 2 

comboIndexesPtr = twoByTwo; 
numCominat ions = 2; 
break; 

case 3: // 2 by 3 

comboIndexesPtr = twoByThree; 
numCominations =6; ( 
break; 

case 4 : // 2 by 4 

comboIndexesPtr = twoByFour; 
numCominations =12; 
break; 

case 5: // 2 by 5 

comboIndexesPtr = twoByFive; 
numCominations = 20; 
break; 

default: // must be greater than 5, 

so just use first five 

comboIndexesPtr = twoByFive; 
numCominations - 20; 
break; 

} 

break; 
case 3: 

switch (yDim) { ' 

case 3: // 3 by 3 

comboIndexesPtr = threeByThree ; 

numCominations = 6; 

break; 

case 4: // 3 by 4 

comboIndexesPtr = threeByFour; 

numCominations = 24; 

break; 

case 5: // 3 by 5 

comboIndexesPtr = threeByFive; 

numCominations = 60; 

break; 

default: // must be greater than 5, 

so just use first five 

comboIndexesPtr = threeByFive; 

numCominations = 60; 

break; 

} 

break; 
case 4 : 

switch (yDim) { 

case 4: // 4 by 4 

comboIndexesPtr = fourByFour; 

numCominations = 24; 

break; 

case 5: // 4 by 5 



comboIndexesPtr = fourByFive; 
riumCominations = 120; 
break; 

default: // must be greater than 5, 

so just use first five 

comboIndexesPtr = fourByFive; 
numCominations = 120; 
break; 

} 

break; 
case 5: 

switch (yDim) { 

case 5: // 5 by 5 

comboIndexesPtr = fiveByFive; 
numCominations =120; 
break; 

default: // must be greater than 5, 

so just use first five 

comboIndexesPtr = fiveByFive; 
numCominations = 120; 
break; 

} 

break; 

default: // must be greater than 5, so just use 

first five 

// also, since xDim 

is <= yDim, we do not have to 

// handle 5X2, 5 X 

3, etc 

comboIndexesPtr = fiveByFive; 
numCominations = 120; 
break; 

> 

// we always use xDim matrix cells to compute the score, since 

it 

// is the smaller of the dimensions. We go through each 
combination 

// .and evaluate the scores found in the scores array for the 
// particular combination of indexes. 

// Each evaluation must consider xDim values, so each pass 
through the 

// loop collects xDim values. 

// The values from the comboIndexesPtr array are the column 
indexes . 

// numCominations is the number of times we iterate through the 
loop to 

// look at a combination of elements in the score matrix. 
// 

// For example: 

// if I have a 2 X 3 matrix, I need to find the best valid 2 
segment 

// combination {since 2 is xDim) . There are 6 possible 
combinations , 

// and the column values are stored as pairs in the twoByThree 

array . 

// The row values are implicitly 0 and 1 for each pair, so I 

end up 

// checking: 

// scores [0] [twoByThree [0] ] + 

scores [1] [twoByThree [ 1 ] ] ; 



// 




scores [0] [twoByThree [ 2] ] 


+ 


scores [1] [twoByThree [3] ] ; 






// 




scores [0] [twoByThree [ 4 ] ] 


+ 


scores [1] [twoByThree [5] ] ; 






// 




scores [0] [twoByThree [ 6] ] 


+ 


scores [1] [twoByThree [7 ] ] ; 






// 




scores [0] [ twoByThree [ 8 ] ] 


+ 


scores [1] [twoByThree [ 9] ] ; 






// 




scores[0] [ twoByThree [ 10 ] 


] + 


scores [1] [twoByThree [ 1 1 ] ] ; 






// 








double 


tempSegScores [NH 


MAX SEGS AFTER TAQ] ; 




int 


comboArraylndex = 0; 




int 


i, row; 






bool 


includesHighestScore; 




double 


swapVal; 






int 


templndex; 






double 


compVal; 






int 


numChariges ; 






double 


tempVal; 







// init the temp seg scores array to zeros, so that the first 
// comparison will fail. 

for (templndex = 0; templndex < xDim; templndex++) { 
bestSegScores [templndex] = 0; 

} 

for {i - 0; i < numComi nations ; i++) { 

includesHighestScore = false; // assume this combo does 

not 

// include the best score 
for (row = 0; row < xDim; row++) { 
// get a single score 
tempVal ~ 

scores [row] [comboIndexesPtr [comboArraylndex] ] ; 

// now see if score is the low score for this combo 
if (tempVal == highestScore) 

includesHighestScore = true; 

// save this value as part of our temp array of 

scores 

// that we will sort below 
tempSegScores [row] = tempVal; 

// look at next item in the combination (or the 

next combination) 

comboArrayIndex++; 

} 

// see if this combo includes the best score. If so, 

sort it 

// and then compare it to the current numbers in 
bestSegScores . 

if (includesHighestScore == true) { 

// sort the numbers in bestSegScores 
while (1) { 

numChanges = 0; 

for (templndex = 1; templndex < xDim; 

templndex++) { 



if (tempSegScores [templndex - 1] < 
tempSegScores [templndex] ) { 

swapVal = tempSegScores [templndex - 

1]; 

tempSegScores [templndex - 1] = 

tempSegScores [templndex] ; 

tempSegScores [templndex] = swapVal; 
numChanges++; 

} 

} 

if (numChanges == 0) 
break; 

} 

// now compare these temp scores to the current 

best scores 

for (templndex = 0; templndex < xDim; 
templndex++) { 

compVal = tempSegScores [templndex] - 
bestSegScores [templndex] ; 

if (compVai > 0) { . 

// temp scores are better, so replace 



the best scores with them 
templndex++) { 
tempSegScores [templndex] ; 



} 

else 



break out 



for (templndex = 0; templndex < xDim; 
bestSegScores [templndex] = 

} 

break; 

if (compVal < 0) { 

// current scores are better, so 

break; 

} 

// otherwise, just continue the loop. 



/* digraph_score 

This is the core of the name-check algorithm. 

A value from 0.0 to 1.0 is calculated based on the number of 
digraphs which match between the two given strings. 
A bias can be used so that digraphs on the right end of the 
strings count less than those on the left. 

Notes: 

The routine ensures that a digraph can only participate in 
match once. 

Each match results in two points being added to the total, 
final score is the total number of points divided by the 
of digraphs that could have matched. 



The 

number 



The bias works by discounting the score we award for a 
match. As we move into the segment, we subtract 0.1 from 
match score . 

The weight table is used to adjust the divisor (which is 
the total number of digraphs that could have matched) . In 
of bias, we need to decrease that number. Otherwise, an 
would not return a. 1.0, since we would only be deducting 



digraph 
the 

normally 
the case 
exact match 
from the 

score (the numerator), and not the divisor. The weight 
table factors 

correspond to the score that would be assigned to an exact 

match for 

each possible length. In other words, we start at 1, then 

add .9, then 

add .8, etc. (the same pattern we use to deduct from the 

match score) 
*/ 

double NH_digraph__score (char *qSeg, int qSegLen, 

char *evalSeg, int evalSegLen, 

bool useLef tDigraphBias ) 

{ 

char tempDigraphStr [2 + 1] ; // storage for a digraph string 

// terminate the temp digraph string once 
tempDigraphStr [2] = EOS; 

// These are the weights a name has when using a biased 
// (left-to-right) calculation. They end up being used as the 
denominator 

// for the final score calculation 

static const double NH_dig_bias_weights [NH__MAX_SEG_LENGTH + 2] 

= { 1.0, 1.0, 1.9, 2.7, 3.4, 4.0, 4.5, 4.9, 5.2, 5.4, 5.5, 

5.6, 5.7, 

5.8, 5.9, 6.0, 6.1, 6.2, 6.3, 6.4, 

6.5, 6.6, 6.7, 6.8, 6.9, 7.0, 

7.1, 7.2, 7.3, 7.4, 7.5, 7.6}; 

// an array of 1 Y 1 or 1 N ' values, one for each possible digraph 

// position in the eval segment. Each starts out at 1 N 1 and 

gets 

// to 'Y' when (and if) it gets used. 

// Note that we must add 1 because we normally pad the name 

with 

// spaces. 

char alreadyMatched [NH_MAX_SEG_LENGTH + 1]; // max digraphs = 
NAME_SIZE + 1 

// Forget all previous matches. 

memset (alreadyMatched, 1 N 1 , sizeof alreadyMatched); 

// Now count the number of elements involved in matching. 

double qBiasFactor - 0.9; // 0.9 because 



of leading digraph check 

double evalBiasFactor = 0.9; // see note below 

double matchPoints; 
char * evalSegString; 



// start out by checking the first character, which is a 
special 

// case. It forms an implied digraph of " X" (space, followed 

by 

// the character. Thus, if both the query and eval have the 

same 

// first character, we give them 2 match points. 
' // Also, since we really start our loop with the second 
digraph, 

// we set the bias factors, to 0.9 rather than 1.0 
if (qSeg[0] == evalSeg[0]) { 
matchPoints = 2.0; 

} 

else 

matchPoints = 0.0; 

for (int querylndex = 0; querylndex < qSegLen - 1; ++query Index ) 
/* see if this digraph occurs in database name */ 
tempDigraphStr [0] = qSeg [querylndex] ; 
tempDigraphStr [1] = qSeg [querylndex + 1] ; 
evalSegString = evalSeg; 
if (useLeftDigraphBias) { 

// bring down the query bias by 0.1 each time, 
until we get to 0 . 1 

if ((querylndex > 0) && (querylndex < 10)) 
qBiasFactor -=0.1; 

} 

do { 

evalSegString = strstr (evalSegString, tempDigraphStr) 
if (evalSegString != NULL) { 

int evalMatchOffset = evalSegString - evalSeg; 

if (alreadyMatched[evalMatchOffset] 1 N 1 ) { 
alreadyMatched [evalMatchOffset] = ' Y 1 ; 
if (useLeftDigraphBias) { /* decrement 
eval match-bias, minimum 0.10 */ 



(evalMatchOffset + 1); 



evalBiasFactor; 



evalBiasFactor = 1.0 - 0.1 * 

if (evalBiasFactor < 0.1) 

evalBiasFactor = 0.1; 
matchPoints += qBiasFactor + 



} 

else 

break; 
} 



matchPoints += 2.0; 



else 



evalSegString++ ; 

) 

) while (evalSegString != NULL) ; 



} 



// now do a check for the "hidden" digraph at the end of the 
segment 



// tp account for the non-existant trailing space 
if (qSeg[qSegLen - 1] — evalSeg [evalSegLen - 1]) { 
if (useLeftDigraphBias) f 

evalBiasFactor = 1.0 - 0.1 * evalSegLen; 
if (evalBiasFactor < 0.1) 

evalBiasFactor =0.1; 
// don't forget to bring down the query bias by 0 . 1 

also, 

// unless we are at 0.1 

if ( (querylndex > 0) && (querylndex < 10)) 

qBiasFactor -= 0.1; 
matchPoints += qBiasFactor + evalBiasFactor; 

} 

else 

matchPoints += 2.0; 

} 

// The return value is the number of elements involved in matching 
// compared to the total number of elements, 
return useLeftDigraphBias 

? matchPoints / 

(NH_dig_bias_weights [qSegLen + 1] + NH_dig_bias_weights [evalSegLen + 1)) 
~ . : matchPoints / (qSegLen + evalSegLen + 

2); 

} /* NH_digraph_score */ 



/* 

This function adjusts the diScore (which already has some value) 

based 

on the TAQ values that are attached to the two segments passed in. 

In the NameHunter system, TAQs .are broken up into two types 
(disregard and 

delete). In general, disregard TAQs (e.g. "Jr.") contain more 
meaningful information than delete TAQs (e.g. "Mr."), and thus 
disregard TAQs are considered more important when 
evaluating/comparing 

TAQs between segments. 

There are three factors involved in modifying the score. These 

are 

delete factor 
disregard factor 
absent factor 

When applied, a factor is multiplied by the existing score. 
However, 

deciding which factor (if any) to apply is somewhat complex, 
especially 

when one or both of the segments have multiple TAQ values. For 

this 

reason, we describe the multi-TAQ situation separately. 

For situations where both segments have either 0 or 1 TAQ values, 

we 

use the following matrix to choose a factor to apply: 



I No 

TAQ t Delete TAQ i ■ Disregard 

TAQ I 

I , | 

No TAQ | No Change | Absent 

Factor I Absent Factor I 



I | | 

Delete TAQ | ' Absent I Delete 

Factor I Absent Factor I 

| Factor 

I Unless 
same I 
. I 

I | i— 

Disregard TAQ i Absent I Disregard 

Factor | Absent 

Factor I 

I 

| Factor 

I Unless 
same I 

I I 

I ( | 



For the multiple case, we use the algorithm below. A general word 
about the alg - we are treating disregard as more important than 
delete, so we start out by checking for disregards. All it takes 
is for one disregard value in each of the segments to match to 
avoid applying the disregard factor. The same goes for deletes. 
If we have any dis values in one segment, but none in the other, 
we apply the absent factor. 

Assuming segments SI and S2: 

Look- for dis segments in SI 
if found 

if same segment found in S2 

go on to delete processing 

else 

if no dis segments in S2 
apply absent value 
else, continue looking for dis segments in 

SI that match S2 

if we get to end of SI segments and still 

have not found a 

matching dis in S2, apply dis factor, 
else (no dis found in SI) 
look for dis in S2 
if found 

apply absent 



else 



go on to delete processing 



Delete processing: 

look for deletes in SI 
if found 

if same seg found in S2 
do nothing 

else 

if no deletes in 32 
apply absent 

else 

continue to look for deletes in SI. 



If we get to end if 
deletes that match a 



SI segments and do not find any 
delete in S2, .apply delete factor 



else {not deletes found in SI) 
look for deletes in S2 
if delete found 

apply absent 

else 

do nothing. 

V 

void NH_apply_TAQs_to_score {double *diScore, Segment *qSeg, Segment 
*evalSeg, 

double absDelTAQFactor, 
double absDisTAQFactor, 
double delTAQFactor, 
double disTAQFactor) 

{ 

int numQTAQs = qSeg->numTAQs ; 

int numEvalTAQs = evalSeg->numTAQs ; 

double applyFactor =1.0; 

// handle the simple case first 

if ({numQTAQs <= 1) && (numEvalTAQs <= 1) ) { 
switch (numQTAQs) { 

case 0: 

if (numEvalTAQs ==1) { 

if (evalSeg->taqList [0] . taqAction == 

NH_TAQ_ACTION_DELETE ) 

applyFactor = absDelTAQFactor; 

else 

applyFactor = absDisTAQFactor; 

} 

break; 
case 1: 

if (numEvalTAQs ==1) { 

// both segs have 1 TAQ value, so 

// figure out the type of TAQs involv 

if (qSeg->taqList [0] . taqAction == 

NH TAQ_ACTION_DELETE) { 

if (evalSeg->taqList [0] . taqAction 



NH_TAQ_ACTION__DELETE)" { 

// same action, so see if 

string are the same 

if (strcmp (qSeg- 

>taqList[0] .segString, 

evalSeg->taqList [0] .segString) ) 

applyFactor = 

delTAQFactor; // they were different, so apply delete 

factor 

} 

else // not the same 

action, so do the absent 

applyFactor = absDisTAQFactor; 

} 

else { // not 

NH_TAQ_ACTION_DELETE, so must be 

// ' disreg 

ard 

if (evalSeg->taqList [0] . taqAction — 

NH_TAQ_ACT I ON_D I S REGARD ) { 

// same action, so see if 

string are the same 

if (strcmp (qSeg- 

>taqList[0] .segString, 

evalSeg->taqList [0] . segString) ) 

applyFactor = 

disTAQFactor; // they were different, so apply dis 

factor 

} 

else // not the same 

action, so do the absent dis 

applyFactor = 

absDisTAQFactor; // since dis takes precidence of del 

} 

} 

else { // query had 1 TAQ, but eval had 

none 

if (qSeg->taqList [0] .taqAction == 

NH_TAQ_ACTION_DELETE) 

applyFactor = absDelTAQFactor ; 

else 

applyFactor = absDisTAQFactor; 

} 

break; 

} 

1 

else { 

// one (or both) of the segments has more than 1 TAQ 

value 

// First see if either has no TAQ segments. In this 

case, 

// we can apply the absent factor and skip the ugly 

processing 

// below 

if' (numQTAQs == 0) { 

// assume the abs del factor, but look for a DIS in 

the 

// eval. If we find one, set the applyFactor to 



the abs dis 

// since that should take precidence 
applyFactor = absDelTAQFactor; 

for (int evallndex = 0; evallndex < numEvalTAQs; 
evallndex++) { 

if {evalSeg->taqList [evallndex] . taqAction == 
NHJTAQ_ACTION_DISREGARD) f 

applyFactor = absDisTAQFactor; 
break; 

} 

I 

} 

else if (numEvalTAQs == 0) { 

// assume the abs del factor, but look for a DIS in 

the 

// query. If we find one, set the applyFactor to 

the abs dis 

// since that should take precidence 

applyFactor = absDelTAQFactor; 

for (int qlndex = 0; qlndex < numQTAQs; 

qlndex++) { 

if (qSeg->taqList [qlndex] . taqAction == 
N H_T AQ__AC T 1 0N_D I S REG AR D ) { 

applyFactor = absDisTAQFactor; 
break; 

} 

} 

} 

else { 

// one segment has 2 or more TAQs, and the other 



has one or more 



satified the 



satified the 



satified the 



bool satisfiedDis = true; // we assume we have 

// dis processing until we find 

// a dis value, since if neither 

// seg has a dis value, we do not 

// apply the dis value 
bool satisfiedDel = true; // we assume we have 

// del processing until we find 

// a del value, since if neither 

// seg has a del value, we do not 

// apply the del value 
bool satisfiedAbs = true; // we assume we have 



// abs processing, 
bool foundMatchingDis = false; 
bool f oundMatchingDel = false; 



int i, j; 



// go through the query segment, looking for dis 

segments 

for (i = 0; i < numQTAQs; i++) I 

if (qSeg->taqList [i] . taqAction == 
NH TAQ_ACTION_DISREGARD) f 

~ // since we found a dis, we must find a 

dis in the eval seg. 

satisfiedDis = false; 
satisfiedAbs = false; 



// 
for 

{ 



look for disregards in the eval seg. 

(j = 0; j < numEvalTAQs; 

if (evalSeg->taqList [j ]. taqAction 



NH_TAQ_ACTION_DISREGARD) 
not dealing with an absent 
segs are the same. 

>taqL'ist[i] . segString, 

evalSeg->taqList [ j ] . segString) ) 



// found a dis, so we are 

// situation - see if the 

satisfiedAbs = true; 
if ( ! strcmp (qSeg- 



f oundMatchingDis = true; 
satisfiedDis = true; 
break; 



been satified, we 

not find any dis in the 



} 

} 

// if we get here, and the abs has not 

// apply the abs factor, since we did 

// eval, but did find one in the query, 
if (satisfiedAbs == false) { 

applyFactor = absDisTAQFactor ; 

.// mark the DIS as satisfied so 



that we do not 

when seeing if DEL was satisfied. 

} 



// re-assign the factor below 

satisfiedDis = true; 
break; 



else { 
II 



check to see if we satisfied 



the dis. If we did, we can 



// go check out the delete stuff, 
if (satisfiedDis == true) 
break; 



// 



end for query TAQ 



TAQs while looking 



- so go on 



// 

J 

// 
// 

// 



once here, we made it to the end of the query 

for disregards. This means either: 

we found no disregards in the query 

and see if there are any 



disregards in the Eval 

// 

in Eval - we 

// 

we ' re done 

//■ 

ones in Eval - we 

// 

and we're done 

// 

Eval - so do deletes. 

// 

disregards in Eval, since 
// 

match . 



we found disregards in Q, but none 
apply the absent factor, and 

we found dis in Q, but no matching 
apply the disregard factor, 

we found a matching dis in Q and 
we can skip the check for 
we already know there is a 



// make sure we should continue 
if (satisfiedAbs && satisf iedDis ) { 
// 

if ( foundMatchingDis == false) 



{ 



no Dis Values, 



NH TAQ ACTION DISREGARD) 



// We are in this section if the Q had 

// see if there are dis values in Eval. 
for (j = 0; j < numEvalTAQs; j++) { 

if (evalSeg->taqList [ j] .taqAction == 

{ 

applyEactor = absDisTAQFactor ; 

satisfiedAbs = false; 

break; 



} 

// see if we should still continue after 

checking for reverse absent 

if (satisfiedAbs) { 

// when here, we got passed checking 

for the dis, so we need to check for 

deletes . 



looking for del segments 



// 
// 
for 



go through the query segment, 

(i = 0; i < numQTAQs ; i++) { 
if (qSeg->taqList [i] .taqAction == 



// 



since we found a del, we 



satisfiedDei = false; 
satisfiedAbs = false; 
// look for deletes in the 

for (j =0; j < numEvalTAQs; 



NH_TAQ_ACTION_DELETE) { 
must find a del in the eval seg. 

eval seg. 
i++) { 

if (evalSeg- 

>taqList[j] .taqAction == NH TAQ ACTION DELETE) { 

M J _ _ _ found a del, 

so we are not dealing with an absent 

// situation - 

see if the segs are the same. 

satisfiedAbs = 

true; 



>taqList[i] . segString, 

evalSeg->taqList [ j ] . segString} ; { 

gDel = true; 
= true; 

} 

} 



if ( 1 strcmp(qSeg- 



f oundMatchin 
satisf iedDel 
break; 



} 

// if we get here, and the 

abs has not been satified, we 

// apply the abs factor, 

since we did not- find any del in the 

// eval, but did find one 

in the query. 

if (satisf iedAbs == 

false) { 

applyFactor = 

absDelTAQFactor; 

// mark the DEL as 

satisfied so that we do not 

// re-assign the 

factor below when seeing if DEL was satisfied. 

satisfiedDel = 

true ; 

break; 

> 

else { 

// check to see if we 



satisfied the del. If we did, were done 
true) 



if (satisfiedDel — 
break; 



} 

} // end for query TAQ 

// make sure we should continue 
if (satisf iedAbs && satisfiedDel) { 
if { f oundMatchingDel == 

// We are in this section 

// see if there are del 

for (j = 0; j < numEvalTAQs; 



false) { 

if the Q had no Del Values, 
values in Eval. 
{ 

if (evalSeg- 

>taqList [j] taqAction == NH_TAQ_ACTION_DELETE) { 

applyFactor = 

absDelTAQFactor; 

satisfiedAbs = 

false; 

break; 

} 

} 

} 



} 



} 

} 

// decide the factor based on the condition that 

was not satisfied 

// except for abs, in which case we already set the 

applyFactor 

/ / above 

if (satisf iedDel =- false) 

applyFactor = delTAQFactor ; 
else if (satisf iedDis == false) 

applyFactor = disTAQFactor ; 

} 

} 

// apply the factor we decided on 
*diScore *= applyFactor; 

} 



// DigraphBitmapArray .hpp : header file 
// 

// Class that holds the bit patterns for each possible 

// digraph (AA - ZZ) . We also need to account for spaces. 

// 

// Each bit pattern turns on just 1 bit. We basically turn 
// on one bit, and shift it through the value until it reaches 
// the other end, at which time we start back at the beginning 
// again. 

// 

// Any other character are treated as spaces in our scheme, 

// so we do not need to worry about them. 

// 

// The class supports either a 32 bit value, or a 64 bit value. 

1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 

urn 

iifndef DIGRAPHBITMAPARRAY^HPP 
#define DIGRAPHBITMAPARRAY_HPP 

// How many indexes do we need in our two dimensional array? 
// 27 (26 letters plus a space) 

#define BITMAP ARRAY INDEX SIZE 27 



typedef struct { 

unsigned int hiBytes; 

unsigned int lowBytes; 
} bit_64_t; 

class NHDigraphBitmapArray 
{ 

// Construction 
public : 

NHDigraphBitmapArray {) ; // standard constructor 
-NHDigraphBitmapArray ( ) ; 

unsigned int get32BitKeyForToken (char *token) ; 

void get64BitKeyForToken (char *token, 

bit_64_t *key); 

unsigned char getNumBitsForByte (unsigned char byteVal) {return 

bitTable[byteVal] ; } 

// Implementation 
protected: 

void buildBitTable ( ) ; 

// the array that holds the bit map paterns for each possible 

// digraph. Each item in the array is an integer that has 

// one of its 32 bits turned on. 
unsigned 

int bitMapArray32 [BITMAP_ARRAY_INDEX_SIZE] [ BITMAP_ARRAY_I NDEX_S I 
ZE] ; 



// the array that holds the bit map paterns for each possible 



. ■ // digraph. Each item in the array is an integer that has 
// ohe of its 64 bits turned on. 

bit_64_t bitMapArray64 [ BITMAP_ARRAY_ INDEXES I Z 

E] [ B I TMAP_ARRA Y_I N DEX_S I Z E ] ; 

unsigned char bitTable [256] ; 



#endif 



// NHDigraphBitmapArray . cpp : implementation file 
// 

// 3/20/98 EFB Changed names to NH from SN 

#include "NHDigraphBitmapArray . hpp" 

# include <stdio . h> 

fifdef _DEBUG 
#define new DEBUG_NEW 
#undef THIS_FILE 

static char THIS_FILE[] = FILE ; 

#endif 



typedef unsigned char byte; 
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It III 

II Constructor. 

// Fills in the values in both of the bitMapArrays (32 bit and 
// 64 bits) . 

NHDigraphBitmapArray : : NHDigraphBitmapArray ( ) 
{ 

unsigned int bitmapValue32 = 1; 

unsigned int bitmapVal.ue64High = 0; 

unsigned int bitmapValue64Low = 1; 

for (int i = 0; i < BITMAP_ARRAY_INDEX_SI ZE; i++) { 

for (int j = 0; j < B I TMAP_ARRAY_I N DEX_S I Z E ; j++) { 

// assign the 32 ,bit value 
bitMapArray32 [i] [ j ] = bitmapValue32 ; 

// assign the 64 bit value 

bitMapArray64 [i] [j] .hiBytes = bitmapValue64 High ; 
bitMapArray64 [i] [j] .lowBytes = bitmapValue64Low; 

// now shift the values 
bitmapValue32 «= 1; 
if (bitmapValue32 0) 
bitmapValue32 = 1; 

if (bitmapValue64Low == 0) { 
bitmapValue64High «-= 1; 
if (bitmapValue64High ==0) { 
bitmapValue64Low = 1; 

} 

} 

else { 

bitmapValue64Low <<= 1; 
if (bitmapValue64Low ==0) { 
bitmapValue64High = 1; 

» 

} 

} 

} 



buildBitTable ( ) ; 

} 



NHDigraphBitmapArray : : -NHDigraphBitmapArray ( ) 

{ 

} 



void NHDigraphBitmapArray: : get64BitKeyForToken {char +token, bit_64_t 
*key ) 



{ 

char *chl; 

char *ch2; 

int indexl; 

int index2; 

char spacedToken [200] ; 

// zero out the key we are going to return 
key->hiBytes = 0; 
key->lowByte.s = 0; 

sprintf (spacedToken, " %s ", token); 

chl = spacedToken; 
if (*chl != '\0') { 
ch2 = chl + 1; 
while (*ch2 !=. '\0'} { 
if <*chl == ' ') 

indexl = 26; 

else 

indexl = *chl - 'A' ; 
if (*ch2 == ' ') 

index2 - 26; 

else 

index2 - *chl - ' A ' ; 
if ((indexl >= 0) && (indexl < 
B I TMAP_ARRAY_I NDEX_S I Z E ) 

&& (index2 >= 0) && (index2 < 
BITMAP_ARRAY_INDEX_SIZE) ) { 

key->hiBytes 
bitMapArray64 [indexl] [index2] .hiBytes; 

key->lowBytes |= 
bitMapArray64 [indexl] [index2] . lowBytes; 

} 

chl = ch2; 
ch2++; 

} 

} 



unsigned int NHDigraphBitmapArray :: get32BitKeyForToken (char *token) 

{ 

unsigned int retVal = 0; 

char *chl; 

char *ch2; 

j_ n t indexl; 

int index2; 

char spacedToken[200] ; 



sprintf {spacedToken, " %s ", token); 

chl = spacedToken; 
if (*chl != '\0') { 
ch2 = chl + 1; 
while (*ch2 != ' \0 1 ) { 
if (+chl == ' ' ) 

indexl = 26; 

else 

■indexl = *ehl - 1 A ' ; 
if (*ch2 == ' * ) 

index2 =26; 

else 

index2 = *chl - 1 A' ; 
if ((indexl >= 0) && (indexl < 
B I TMAP_ARRAY_I N DEX_S IZE) 

&& (index2 >= 0} && (index2 < 

BITMAP_ARRAY_INDEX_SIZE) ) 

retVal |= bitMapArray32 [indexl] [index2] ; 
chl = ch2; 
ch2++; 

} 

} 

return retVal; 



// build a table that says how many bits a byte value 
// has turned off. 

void NHDigraphBitmapArray : : buildBitTable ( ) 
{ 

byte tempByte; 
int i, j; 

byte bitsTurnedOf f ; 

for (i =0; i < 256; i++) { 
tempByte = i; 
bitsTurnedOff = 0; 
for (j = 0; j < 8; j++) { 

if (tempByte & 1) // use this 

when array says how many l's 

// if ((tempByte & 1) == 0} // use 

this when array says how many O's 

bitsTurnedOf f ++; 
tempByte >>= 1; 

} 

bitTable[i] = bitsTurnedOff; 

} 

} 



File: "NHCompParms . cpp 
Description : 

Implementation to the NHCompParms class. 



// 
// 
// 
// 
// 
// 
// 

// History: 
// 

// 5/8/97 
// 3/3/98 
parms to 
// 

NHNameParms class. 
// 3/20/98 
// 



EFB 
EFB 



EFB 



Created 

Changed name of class, and mov 

the new 
Changed names to NH from SN 



#include <string.h>- 
Hnclude <stdio.h> 
#include <stdlib.h> 



tinclude "NHCompParms . hpp" 

#include "NHVariantTable . hpp" 

# include "NHTAQTable . hpp" 

#include "NH_variant_taq_globals . h" 



NHCompParms: : NHCompParms (NHParmsType parmsType) 

{ 

status = NH SUCCESS; 



switch (parmsType) 



( 



// 



false; 

true; 

false; 



case NH_PARMS_GENERIC : 
scoreThresh = 0.6; 
useGnLef tBias = false; 
useSnLef tBias 
matchGnlntial 
matchSnlntial 
gnlnitialScore = 0.85; 
snlnitialScore = 0.0; 
gnlnitialOnlnitialMatchScore = 1.0; 
snlnitialOnlnitialMatchScore = 0.0; 
useGnVariants = true; 
useSnVariants = true; 
fnuScore = 0 . 60; 
nf nScore = 0.65; 
InuScore = 0.6; 
nlnScore - 0.65; 

gnAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 
snAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 
gnAnchorFactor - 0.0; 

= 0.0; 
.6; 
. 6; 
0.7; 

0.9; 



def aul 



snAnchorFactor 
gnOOPSFactor 
snOOPSFactor = 
disGnTAQFactor 
absDelGnTAQFactor 



0, 
0. 



absbisGnTAQFactor = 0.8; 
delGnTAQFactor =0.85; 
disSnTAQFactor = 0.7; 
absDelSnTAQFactor « 0.9; 
absDisSnTAQFactor = 0.8; 
delSnTAQFactor = 0.85; 
checkGnCompressedName = false; 
checkSnCompressedName = false; 
gnCompressedNameScore = 0.0; 
snCompressedNameScore = 0.0; 
scoreGnTaqs = true; 
scoreSnTaqs = true; 

gnSegmentScoreMode = NH_SEGMODE_AVG; 

snSegmentScoreMode = NH_SEGMODE_AVG; 

gnScoreThresh = 0.5; 

snScoreThresh = 0.5; 

gnWeight = 0.8; 

snWeight = 1.0; 

break; 



case NH_PARMS_ANGLO : 

scoreThresh = 0.6; 
useGnLef tBias = false; 
useSnLef tBias = false; 
matchGnlntial = true; 
matchSnlntial = false; 
gnlnitialScore = 0.85; 
snlnitialScore = 0.0; 
gnlnitialOnlnitialMatchScore, = 1.0; 
snlnitialOnlnitialMatchScore = 0.0; 
useGnVariants = true; 
useSnVariants = true; 
fnuScore - 0.60; 
nfnScore - 0.65; 
InuScore = 0.6; 
nlnScore = 0.65; 

gnAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 
snAnchorSegmentMode = NH_ANCHOR_SEG__NONE ; 
gnAnchorFactor = 0.0; 
snAnchorFactor = 0.0; 
gnOOPSFactor = 0.6; 
snOOPSFactor = 0.6; 
disGnTAQFactor = 0.7; 
absDelGnTAQFactor « 0.9; 
absDisGnTAQFactor = 0.8; 
delGnTAQFactor = 0.85; 
disSnTAQFactor = 0.7; 
absDelSnTAQFactor = 0.9; 
absDisSnTAQFactor = 0.8; 
delSnTAQFactor = 0.85; 
checkGnCompressedName = false; 
checkSnCompressedName = false; 
gnCompressedNameScore = 0.0; 
snCompressedNameScore = 0.0; 
scoreGnTaqs = true; 
scoreSnTaqs = true; 

gnSegmentScoreMode = NH_SEGMODE_AVG; 
snSegmentScoreMode = NH_SEGMODE_AVG; 
gnScoreThresh = 0.5; 
snScoreThresh = 0.5; 
gnWeight = 0.8; 



snWeight = 
break; 



1.0; 



case NH_PARMS_ARAB I C : 

scoreThresh = 0.63; 
useGnLef tBias = false; 
useSnLef tBias = false; 
matchGnlntial = true; 
matchSnlntial = true; 
gnlnitialScore = 0.85; 
snlnitialScore = 0.85; 
gnlnitialOnlnitialMatchScore = 1.0; 
snlnitialOnlnitialMatchScdre = 1.0; 
useGnVariants = false; 
useSnVariants = false; 
f nuScore = 0 . 60; 
nfnScore = 0.65; 
InuScore = 0.6; 
i nlnScore = 0.65; 

gnAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 
snAnchorSegmentMode = NH_ANCHOR__SEG_NONE; 
gnAnchorFactor ^0.0; 
snAnchorFactor = 0.0; 
gnOOPSFactor = 0.7; 
snOOPSFactor = 0.9; 
disGnTAQFactor = 0.7; 
absDelGnTAQ Factor = 0.9; 
absDisGnTAQFactor « 0.8; 
delGnTAQFactor =0.85; 
disSnTAQFactor = 0.7; 
absDelSnTAQFactor =0.9; 
absDisSnTAQFactor =0.8; 
delSnTAQFactor = 0.85; 
checkGnCompressedName = true; 
checkSnCompressedName = true; 
gnCompressedNameScore = 0.9; 
snCompressedNameScore = 0.9; 
score'GnTaqs = true; 
scoreSnTaqs = true; 

gnSegmentScoreMode « NH_SEGMODE_AVG; 

snSegmentScoreMode = NH_SEGMODE_AVG; 

gnScoreThresh = 0.63; 

snScoreThresh = 0.63; 

gnWeight = 1.0; 

snWeight = 0.8; 

break; 

case NH_PARMS_CHINESE : 

scoreThresh = 0.70; 
useGnLef tBias = false; 
useSnLeftBias = false; 
matchGnlntial = false; 
matchSnlntial = false; 
gnlnitialScore = 0.0; 
snlnitialScore = 0.0; 
gnlnitialOnlnitialMatchScore = 0.0; 
snlnitialOnlnitialMatchScore = 0.0; 
useGnVariants . = true; 
useSnVariants = true; 
fnuScore = 0.60; 
nfnScore = 0.65; 



InuScore = 0.6; 
nlnScore = 0 . 65; 

gnAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 
snAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 
gnAnchor Factor = 0.0; 
snAnchor Factor = 0.0; 
gnOOPSFactor = 0.0; 
snOOPSFactor = 1.0; 
disGnTAQFactor = 0.7; 
absDelGnTAQFactor = 0.3; 
absDisGnTAQFactor = 0.8; 
delGnTAQFactor =0.85; 
disSnTAQFactor = 0.7; 
absDelSnTAQFactor ■= 0.9; 
absDisSnTAQFactor = 0.8; 
delSnTAQFactor = 0.85; 
checkGnCompressedName = false; 
checkSnCompressedName = false; 
gnCompressedName Score = 0.0; 
snCompressedNameScore = 0.0; 
scoreGnTaqs = true; 
scoreSnTaqs = true; 

gnSegmentScoreMode = NH_SEGMODE_LOWEST ; . 

sn'SegmentScoreMode = NH_SEGMODE_AVG; 

gnScoreThresh =0.7; 

snScoreThresh = 0.7; 

gnWeight = 0.8; 

snWeight =1.0; 

break; 

case N H_P ARMS_H I S PAN I C : 
scoreThresh = 0.60; 
useGnLef tBias = false; 
useSnLef tBias = false; 
matchGnlntial = true; 
matchSnlntial = true; 
gnlnitialScore = 0.85; 
snlnitialScore = 0.85; 
gnlnitialOnlnitialMatchScore = 1.0; 
snlnitialOnlnitialMatchScore = 1.0; 
useGnVariants = true; 
useSnVariants = true; 
fnuScore = 0. 60; 
nfnScore = 0 . 65; 
InuScore = 0.6; 
nlnScore = 0 . 65; 

gnAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 

snAnchorSegmentMode = NH__ANCHOR_SEG_FIRST ; 

gnAnchorFactor = 0.0; 

snAnchorFactor = 0.70; 

gnOOPSFactor = 0.6; 

snOOPSFactor = 0.6; 

disGnTAQFactor = 0.7; 

absDelGnTAQFactor = 0.9; 

absDisGnTAQFactor = 0.8; 

delGnTAQFactor =0.85; 

disSnTAQFactor = 0.7; 

absDelSnTAQFactor =0.9; ' 

absDisSnTAQFactor = 0.8; 

delSnTAQFactor = 0.85; 

checkGnCompressedName = true; 



checkSnCompressedName = true; 
gnCompressedNameScore = 0.9; 
snCompressedNameScore = 0.9; 
scoreGnTaqs = true; 
scoreSnTaqs = true; 

gnSegmentScoreMode = NH_SEGMODE__AVG; 

snSegmentScoreMode = NH_SEGMODE_AVG; 

gnScoreThresh = 0.6; 

snScoreThresh = 0.6; 

gnWeight = 0.8; 

snWeight = 1.0; 

break; 

case NH_PARMS_KOREAN : // Parameters 

tuned for Korean names. 

scoreThresh = 0.66; 
useGnLeftBias = false; 
useSnLef tBias - false; 
matchGnlntial = false; 
matchSnlntial = false; 
gnlnitialScore = 0.0; 
snlnitialScore = 0.0; 
gnlnitialOnlnitialMatchScore = 0.0; 
snlnitialOnlnitialMatchScore = 0.0; 
useGnVariants = true; 
useSnVariants = true; 
fnuScore = 0 . 60; 
nfnScore = 0 . 65; 
InuScore \= 0.6; 
nlnScore - 0.65; 

gnAnchorSegmentMode = NH_ANCHOR_SEG_NONE ; 
snAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 
gnAnchorFactor = 0.0; 
snAnchorFactor =0.0; 
gnOOPSFactor = 0.69; 
snOOPSFactor = 0.63; 
disGnTAQFactor = 0.7; 
absDelGnTAQFactor = 0.9; 
absDisGnTAQFactor = 0.8; 
delGnTAQFactor - 0.85; 
disSnTAQFactor = 0.7; 
absDelSnTAQFactor = 0.9; 
absDisSnTAQFactor = 0.8; 
delSnTAQFactor = 0.85; 
checkGnCompressedName = false; 
checkSnCompressedName = false; 
gnCompressedNameScore = 0.0; 
snCompressedNameScore = 0.0; 
scoreGnTaqs = true; 
scoreSnTaqs = true; 

gnSegmentScoreMode = NH_SEGMODE_AVG; 

snSegmentScoreMode = NH_SEGMODE_AVG; 

gnScoreThresh = 0.69; 

snScoreThresh = 0.63; 

gnWeight = 0.8; 

snWeight = 1.0; 

break; 

case NH_PARMS_RUSSIAN: // -Parameters 

tuned for Russian names. 

scoreThresh = 0.61; 



useGnLeftBias = false; 
useSnLef tBias = true; 
matchGnlntial = true; 
matchSnlntial = true; 
gnlnitialScore = 0.85; 
snlnitialScore = 0.85; 
gnlnitialOnlnitialMatchScore = 1.0; 
snlnitialOnlnitialMatchScore = 1.0; 
useGnVariants = false; 
useSnVariants = false; 
fnuScore = 0 . 60; 
nfnScore = 0.65; 
InuScore = 0.6; 
nlnScore = 0 . 65; 

gnAnchorSegmentMode - NH_ANCHOR_SEG_FIRST ; 

snAnchorSegmentMode = NH_ANCHOR_SEG_NONE; 

gnAnchorFactor = 0.60; 

snAnchorFactor = 0.00; 

gnOOPSFactor = 0.65; 

snOOPSFactor = 0.8; 

disGnTAQFactor = 0.7; 

absDelGnTAQFactor = 0.9; 

absDisGnTAQFactor = 0.8; 

delGnTAQFactor - 0.85; 

disSnTAQFactor =0.7; 

absDelSnTAQFactor = 0.9; 

absDisSnTAQFactor - 0.8; 

delSnTAQFactor = 0.85; 

checkGnCompressedName = false; 

checkSnCompressedName - false; 

gnCompressedNameScore = 0.0; 

snCorapressedNameScore = 0.0; 

gnSegmentScoreMode = NH_SEGMODE_HIGHEST; 

snSegmentScoreMode = NH_SEGMODE_AVG; 

gnScoreThresh = 0.6; 

snScoreThresh = 0.62; 

gnWeight = 0.8; 

snWeight = 1.0; 

break; 

} // end of switch 

} 



NHCompParms : : NHCompParms ( istream SinStream) 
{ 

int compParms Version; 



if ( inS t ream. good () ) { 

inSt'ream. read ( (char *) ScompParmsVersion, sizeof (int) ) 



inS t ream. read ( (char *) &scoreThresh, 
inStream. read ( {char * ) &useGnLef tBia 
inStream. read ( (char * ) &useSnLef tBia 
inStream. read { (char * ) &matchGnIntia 
inStream. read ( (char * ) &matchSnIntia 
inStream. read ( (char * ) SgnlnitialSco 
inStream . read ( (char * ) &snInitialSco 
inStream. read ( (char * ) & us eGnVa riant 
inStream. read ( (char * ) &useSnVariant 
inStream. read ( (char *) & fnuScore, si 
inStream. read ( (char *) &nfnScore, si 



sizeof (double) ) ; 
s, sizeof (bool) ) ; 
s, sizeof (bool) ) ; 
1, sizeof (bool) ) ; 
1, sizeof (bool) ) ; 
re, sizeof (double) ) 
re, sizeof (double ) ) 
s, sizeof (bool ) ) ; 
s, sizeof (bool) ) ; 
zeof (double) ) ; 
zeof (double) ) ; 



inStream. read ( (char *) &lnuScore, sizeof (double) ) ; 
inS tr earn. read ( (char *) fcnlnScore, sizeof (double) ) ; 



inStrearn. read ( (char * ) SgnSegmentScoreMode, 
sizeof (NHSegScoreMode) ) ; 

inStream. read( (char *) SsnSegmentScoreMode, 
sizeof (NHSegScoreMode) ) ; 

inStream. read ( (char * ) SgnAnchorSegmentMode, 
sizeof (NHAnchorSegMode) ) ; 

inStream. read ( (char * ) &snAnchorSegmentMode, 
sizeof (NHAnchorSegMode) ) ; 

inStream. read( (char * ) SgnAnchor Factor , sizeof (double) ) ; 

inStream. read ( (char * ) & snAnchor Factor , sizeof (double) ) ; 

inStream. read ( (char * ) SgnOOPSFactor , sizeof (double) ) ; 

inStream. read( (char *) &snOOPSFactor , sizeof (double) ) ; 

inStream. read ( (char * ) &scoreGnTaqs , sizeof (bool) ) ; 
inStream. read ( (char *) SscoreSnTaqs , sizeof (bool) ) ; 

inStream. read ( (char *) &absDelGnTAQFactor, sizeof (double ) ) ; 
inStream. read ( (char *) &absDisGnTAQFactor , sizeof (double ) ) ; 
inStream. read ( (char *) &absDelSnTAQFactor , sizeof (double) ) ; 
inStream. read( (char * ) &absDisSnTAQFactor , sizeof (double) ) ; 
inStream. read ( (char *) &delGnTAQFactor , sizeof (double ) ) ; 
inStream. read ( (char *) &delSnTAQFactor , sizeof (double ) ) ; 
inStream. read ( (char * ) &disGnTAQFactor , sizeof (double) ) ; 
inStream. read ( (char * ) &disSnTAQFactor, sizeof (double) ) ; 

inStream. read ( (char *) &checkGnCompressedName, sizeof (bool) ) 
inStream. read ( (char *) &checkSnCompressedName, sizeof (bool) ) 

inStream. read ( (char * ) &gnCompressedNameScore, 
sizeof (double) ) ; 

inStream. read ( (char * ) SsnCompressedNameScore, 
sizeof (double) ) ; 

inStream. read ( (char *) &gnScoreThresh f sizeof (double) ) ; 
inStream. read ( (char * ) SsnScoreThresh, sizeof (double )) ; 

inStream. read ( (char * ) &gnWeight , sizeof (double) ) ; 
inStream. read ( (char *) &snWeight, sizeof (double) ) ; 

inStream. read ( (char *) SgnlnitialOnlnitialMatchScore, 
sizeof (double) ) ; 

inStream. read ( (char *) SsnlnitialOnlnitialMatchScore, 

sizeof (double) ) ; 

status = NH_SUCCESS; 

} 

else 

status = N H_COMP_PARMS_BAD_STREAM_ON_CON STRUCT; 

} 



NHCompParms : : -NHCompParms ( ) 

{ 

} 



NHReturnCode 



NHCompParms: : archiveData (ostream &outStream) 



( 

// comp parms file version history 

// 1.0 - first version 

int compParmsVersion = 1; 

NHReturnCode rc = NH_SUCCESS; 

if (outStream. good( ) ) { 

out St ream. write ( (char *) &compParms Vers ion, sizeof (int) ) ; 



outStream. write ( {char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 

outStream. write ( (char 
sizeof (NHSegScoreMode) ) ; 

outStream. write ( (char 
sizeof (NHSegScoreMode) ) ; 

outStream. write ( (char 
sizeof (NHAnchorSegMode) ) ; 

outStream. write ( (char 
.sizeof (NHAnchorSegMode) ) ; 

outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 

outStream. write ( (char 
outStream. write ( (char 

outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 
outStream. write ( (char 

outStream. write ( (char 
sizeof (bool ) ) ; 

outStream. write ( (char 
sizeof (bool ) ) ; 

outStream. write ( (char 
sizeof (double ) ) ; 

outStream. write ( (char 
sizeof (double) ) ; 



*) kscoreThresh, sizeof (double) ) ; 
*) SuseGnLef tBias, sizeof (bool) ) ; 
*) SuseSnLef tBias, sizeof (bool) ) ; 
*) fimatchGnlntial, sizeof (bool) ) ; 
*) &matchSnIntial, sizeof (bool) ) ; 
*) &gnInitialScore, sizeof (double) ) ; 
*) SsnlnitialScore, sizeof (double) ) ; 
*) &useGnVariants, sizeof (bool) ) ; 
* ) &useSnVariants, sizeof (bool) ) ; 
* ) &fnuScore, sizeof (double) ) ; 
*) SnfnScore, sizeof (double) ) ; 
*) &lnuScore, sizeof (double) ) ; 
*) &nlnScore, sizeof (double) ) ; 

*) SgnSegmentScoreMode, 

* ) SsnSegmentScoreMode, 

*) SgnAnchorSegmentMode, 

*) &snAnchorSegmentMode, 



*) &gnAnchorFactor, sizeof (double ) ) ; 
*) SsnAnchorFactor, sizeof (double) ) ; 
*) &gnOOPSFactor, sizeof (double) ) / 
*) &snOOPS Factor, sizeof (double) ) ; 

*) &scoreGnTaqs, sizeof (bool) ) ; 
*) &scoreSnTaqs, sizeof (bool) ) ; 

*) &absDelGnTAQFactor, sizeof (double) ) 
*) &absDisGnTAQFactor, sizeof (double) ) 
*) &absDelSnTAQFactor, sizeof (double) ) 
*) &absDisSnTAQFactor, sizeof (double) ). 
*) SdelGnTAQFactor, sizeof (double) ) ; 
*) &delSnTAQFactor, sizeof (double) ) ; 
*) SdisGnTAQFactor, sizeof (double) ) ; 
*) SdisSnTAQFactor, sizeof (double ) ) ; 

*) &checkGnCompressedName, 

*) &checkSnCompressedName, 



*) &gnCompressedNameScore, 
*) &snCompressedNameScore, 



outStream. write ( (char * ) &gnScoreThresh, sizeof (double) ) ; 
outStream. write ( (char *) &snScoreThresh, sizeof (double) ) ; 



outStream. write ( (char *)&gnWeight, sizeof (double )) ; 
outStream. write ( (char * ) &snWeight , sizeof (double) ) ; 

outStream . v/rite ( (char * ) SgnlnitialOnlnitialMatchScore 
sizeof (double) ) ; 

outStream. write ( (char * ) SsnlnitialOnlnitialMatchScore 
sizeof (double) ) ; 
} 

else 

rc = NH_COMP_PARiMS_BAD_STREAM_ON__ARCHIVE; 
return rc; 

} 



NHReturnCode NHCompParms : : setScoreThresh (double aThresh) 

{ 

NHReturnCode errorCode ; 

if ((aThresh < 0.0) II (aThresh > 1.0)) 

errorCode = NH_INVALID_SCORE_THRESH; 

else { 

scoreThresh = aThresh; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

void NHCompParms :: setUseGnLef tBias (bool aBool) 
useGnLef tBias = aBool; 



void NHCompParms :: setUseSnLef tBias (bool aBool) 
useSnLeftBias = aBool; 



void NHCompParms :: setMatchGnlntial (bool aBool) 
matchGnlntial = aBool; 



void NHCompParms : : setMatchSnlntial (bool aBool ) 
matchSnlntial = aBool; 



NHReturnCode NHCompParms: : setGnlnitialScore (double aScore) 

{ 

NHReturnCode errorCode ; 

if ((aScore < 0.0) || (aScore > 1.0)) 

errorCode = NH_INVALID_GN_INIT_SCORE; 
else { 



gYiInitialScore = aScore; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms : : setSnlnitialScore (double aScore) 

{ 

NHReturnCode errorCode ; 

if ((aScore < 0.0) M (aScore > 1.0)) 

errorCode = NH_INVALID_NH_INIT_SCORE; 
else { 

snlnitialScore = aScore; 

errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms : : setGnlnitialOnlnitialMatchScore (double 

aScore) 

{ 

NHReturnCode errorCode; 

if ((aScore < 0.0) I I (aScore > 1.0)) 

errorCode = NH_INVALID_GN_INIT_ON__INIT_MATCH_SCORE; 
else { 

gnlnitialOnlnitialMatchScore = aScore; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms : : setSnlnitialOnlnitialMatchScore (double 

aScore) 

{ 

NHReturnCode errorCode; 

if ((aScore < 0.0) [[ (aScore > 1.0)) 

errorCode = NH_INVALID_NH_INIT_ON_INIT_MATCH_SCORE; 
else { 

snlnitialOnlnitialMatchScore = aScore; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 



void 
{ 



NHCompParms: : setUseGnVariants (bool aBool) 
useGnVariants = aBool; 



void NHCompParms : : setUseSnVariants (bool aBool) 
{ 

useSnVariants = aBool; 

} 

NHReturnCode NHCompParms: : setNFNScore (double aScore) 

{ 

NHReturnCode errorCode ; 

if ((aScore < 0.0) [| (aScore > 1.0)) 

errorCode = NH_INVALID__NFN_SCORE; 

else { 

nfnScore = aScore; 
errorCode « NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : set FNUScore (double aScore) 

{ 

NHReturnCode errorCode ; 

if ((aScore < 0.0) II (aScore > 1.0)) 

errorCode - NH_INVALID_FNU_SCORE; 

else { 

fnuScore = aScore; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms : : setNLNScore (double aScore) 

{ 

NHReturnCode errorCode ; 

if ((aScore < 0.0) II (aScore > 1.0)) 

errorCode = NH_INVALID_NLN_SCORE; 

else { 

nlnScore = aScore; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : setLNUScore (double aScore) 

{ 

NHReturnCode errorCode ; 

if ((aScore < 0.0) II (aScore > 1.0)) 

errorCode = NH_INVALID__LNU_SCORE; 

else { 

InuScore = aScore; 
errorCode = NH_SUCCESS; 

} 



return 



errorCode; 



NHReturnCode NHCompParms: . setGnScoreThresh (double aThresh) 

{ 

NHReturnCode errorCode ; 

if ((aThresh < 0.0) || (aThresh > 1.0)) 

errorCode = NH_INVALID_GN_THRESH; 
else { 

gnScoreThresh = aThresh; 

errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms : : setSnScoreThresh (double aThresh) 

{ 

NHReturnCode errorCode ; 

if ((aThresh < 0.0) || (aThresh > 1.0)) 

errorCode = N H_I N VAL I D_N H_T H RE S H ; 
else { 

snScoreThresh = aThresh; 

errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : setGnWeight (double aWeight) 

{ 

NHReturnCode errorCode; 

if ((aWeight < 0.0) II (aWeight > 1.0)) 

errorCode = NH_INVALID_GN_WEIGHT ; 
else { 

gnWeight = aWeight; 

errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 



NHReturnCode NHCompParms : : setSnWeight (double aWeight) 

{ 

NHReturnCode errorCode; 

if ((aWeight < 0.0) II (aWeight > 1.0)) 
errorCode = NH_INVALID_NH_WEIGHT; 

else { 

snWeight = aWeight; 
errorCode = NH_SUCCESS; 

} 



return errorCode; 



void NHCompParms :: setGnSegmentScoreMode (NHSegScoreMode aMode) 
gnSegmentScoreMode = aMode; 



void NHCompParms: : setSnSegmentScoreMode (NHSegScoreMode aMode) 
snSegmentScoreMode = aMode; 



void NHCompParms: : setGnAnchorSegmentMode (NHAnchorSegMode anAnchorMode ) 
gnAnchorSegmentMode = anAnchorMode; 



void NHCompParms: : setSnAnchorSegmentMode (NHAnchorSegMode anAnchorMode) 
snAnchorSegmentMode = anAnchorMode; 



NHReturnCode NHCompParms: : setGnAnchorFactor (double aFactor) 

{ 

NHReturnCode er rorCode ; 

■if ((aFactor < 0.0) I I (aFactor > 1.0)) 

errorCode = NH_INVALID_GN_ANCHOR_FACTOR; 

else { 

gnAnchorFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : setSnAnchorFactor (double aFactor) 

{ 

NHReturnCode errorCode ; 

if ((aFactor < 0.0) || (aFactor > 1.0)) 

errorCode - NH_INVALID_NH_ANCHOR_FACTOR; 

else _ { 

snAnchorFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : setGnOOPSFactor (double aFactor) 

{ 

NHReturnCode errorCode; 



if ( (aFactor < 0.0) II (aFactor > 1.0)) 

errorCode = NH_INVALID_GN_OOPS_FACTOR; 
else { 

gnOOPSFactor = aFactor; 

errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : setSnOOPSFactor (double aFactor) 

{ 

NHReturnCode errorCode; 

■if ((aFactor < 0.0) II (aFactor > 1.0)) 

errorCode = NH_INVALID_NH_OOPS_FACTOR; 

else { 

snOOPSFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : setAbsDelGnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode; 

if ((aFactor < 0.0) M (aFactor > 1.0)) 

errorCode = NH_I NVALI D_ABS_DEL_GN_TAQ_FACTOR ; 

else { 

absDelGnTAQFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 

NHReturnCode NHCompParms: : setAbsDisGnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode ; 

if ((aFactor < 0.0) [| (aFactor > 1.0)) 

errorCode = N H_I N V AL I D_AB S _D I S __GN_T AQ_F AC TOR; 
else { 

absDisGnTAQFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

> 

NHReturnCode NHCompParms: : setAbsDelSnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode; 



if ((aFactor < 0.0) | | (aFactor > 1.0)) 

errorCode = NH_INVALID_ABS_DEL_NH_TAQ_FACTOR; 
else { 

absDelSnTAQFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 



NHReturnCode NHCompParms : : setAbsDisSnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode ; 

if ((aFactor < 0.0) || (aFactor > 1.0)) 

errorCode = NH_I NVALI D_ABS_DI S_NH__TAQ_ FACTOR ; 
else { 

absDisSnTAQFactor = aFactor; 
errorCode = NH__SUCCESS; 

} 

return errorCode; 

) 



NHReturnCode NHCompParms: : setDelGnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode ; 

if ((aFactor < 0.0) || (aFactor > 1.0)) 

errorCode - N H_ I N V AL I D_D E L_G N_T AQ_ F AC TOR; 

else { 

delGnTAQFactor = aFactor; 
errorCode - NH__SUCCESS; 

} 

return errorCode; 



NHReturnCode NHCompParms: : set DelSnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode ; 

if ((aFactor < 0.0) || (aFactor > 1.0)) 

errorCode = NH_INVALID_DEL_NH_TAQ_FACTOR; 

else { 

delSnTAQFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 



NHReturnCode NHCompParms :: setDisGnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode ; 



if ((aFactor < 0.0) II (aFactor > 1.0)) 



errorCode = NH_INVALID_DIS_GN_TAQ_FACTOR; 
else {' 

disGnTAQFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

} 



NHReturnCode NHCompParms : : setDisSnTAQFactor (double aFactor) 

{ 

NHReturnCode errorCode ; 

if ((aFactor < 0.0) II (aFactor > 1.0)) 

errorCode = NH_I NVALI D__DI S__NH_TAQ_FACTOR; 

else { 

disSnTAQFactor = aFactor; 
errorCode = NH_SUCCESS; 

} 

return errorCode; 

> 



void NHCompParms :: setScoreGnTAQs (bool aBool) 
scoreGnTaqs = aBool; 



void NHCompParms :: setScoreSnTAQs (bool aBool) 
scoreSnTaqs = aBool; 



void NHCompParms : : setCheckGnCompressedName (bool aBool ) 

checkGnCompressedName = aBool; 



void NHCompParms: : setCheckSnCompressedName (bool aBool) 

checkSnCompressedName = aBool; 



NHReturnCode NHCompParms : : setGnCompressedNameScore (double 

aScore) 

{ 

NHReturnCode errorCode; 

if ((aScore < 0.0) I I (aScore > 1.0)) 

errorCode = NH_I NVALI D_GN_COMPRESSED_NAME_SCORE; 
else { 

gnCompressedNameScore = aScore; 
errorCode = NH_SUCCESS; 

} 



return errorCode; 



} 

NHReturnCocie NHCompParms : : setSnCcvopressedNameScore (double 

aScore) 

{ 

NHReturnCode errorCode ; 

if ( (aScore < 0.0) M (aScore > 1.0)) 

errorCode = NH_INVALID_NH_COMPRESSED_NAME_SCORE ; 

else { 

snCompressedNameScore = aScore; 
errorCode - NH_SUCCESS; 

} 

return errorCode; 

} 



bool 
{ 



&& 
&& 

other . 
other . 



NHCompParms: : operator== (NHCompParms &other) 
bool rc; 

rc = ( (scoreThresh — other . scoreThresh) && 

(useGnLeftBias == other . useGnLef tBias ) && 
(useSnLeftBias == other . useSnLef tBias ) && 
(matchGnlntial other .matchGnlntial) && 
(matchSnlntial == other .matchSnlntial ) && 
(gnlnitialScore == other . gnlnitialScore ) && 
(snlnitialScore other . snlnitialScore ) && 
(useGnVariants == other . useGnVariants) && 
(useSnVariants == other . useSnVariants ) && 
(fnuScore == other . fnuScore ) && 
(nfnScore == other . nfnScore ) && 
(InuScore == other . InuScore ) && 
(nlnScore == other . nlnScore ) && 

(gnSegmentScoreMode == other . gnSegmentScoreMode) 

(snSegmentScoreMode » other . snSegmentScoreMode) 

(gnAnchorSegmentMode == 
gnAnchor Segment Mode) && 

(snAnchorSegmentMode == 
snAnchorSegmentMode) && 

(gnAnchorFactor other . gnAnchorFactor) && 
(snAnchorFactor == other . snAnchorFactor ) && 
(gnOOPSFactor == other . gnOOPSFactor) && 
(snOOPSFactor == other . snOOPSFactor) && 
(gnWeight — other . gnWeight ) && 
(snWeight == other . snWeight) && 
(gnScoreThresh == other . gnScoreThresh) && 
(snScoreThresh == other . snScoreThresh) && 
(scoreGnTaqs == other . scoreGnTaqs ) && 
(scoreSnTaqs == other . scoreSnTaqs ) && 
(absDelGnTAQFactor == other . absDelGnTAQFactor ) 

(absDisGnTAQFactor ~ other . absDisGnTAQFactor ) 

(absDelSnTAQFactor == other . absDelSnTAQFactor ) 



(absDisSnTAQFactor -= other . absDisSnTAQFactor) 

&& 

(delGnTAQFactor == other . delGnTAQFactor) && 

(delSnTAQFactor == other . delSnTAQFactor ) && 

(disGnTAQFactor ~ other . disGnTAQFactor) && 

(disSnTAQFactor == other . disSnTAQFactor ) && 

(checkGnCompressedName == 
other . checkGnCompressedName) && 

(checkSnCompressedName == 
other . checkSnCompressedName) && 

(gnCompressedNameScore == 
other . gnCompressedNameScore) && 

( snCompressedNameScore == 
other . snCompressedNameScore) && 

(gnlnitialOnlnitialMatchScore == 
other . gnlnitialOnlnitialMatchScore) && 

(snlnitialOnlnitialMatchScore == 
other . snlnitialOnlnitialMatchScore) ) ; 
return rc; 

} 



NHReturnCode NHCompParms : : getStatus ( ) 

{ 

return status; 

} 



// 
// 
// 
// 
// 
// 
// 
// 

// 

/'/ 
// 
// 
// 
// 
// 
// 



File : NH_variant_taq_globals . h 
Description: 

Functions to manage the global variant and TAQ resources. 
We manage the TAQ and variant tables as global resources 
so that each SNCompParms object does not need to create its 
own copy of them. We provide these global functions so that 
we can control the variables in one location. 



History : 



9/08/97 
3/20/98 



EFB 
EFB 



Created 

Changed names to NH from SN 



#ifndef NH_VARIANT_TAQ_GLOBALS_DEFFED 
#define NH VARIANT TAQ_GLOBALS_DEFFED 



#include 



"NH culture codes. h" 



// function to return pointers to the global SN and GN Variant Tables 
NHVariantTable *NH__getVariantTable (NH_VARIANT_TABLE_TYPES 
variantTableType) ; 



NHTAQTable *NH_getTAQTable ( ) ; 



#endif 



/ / File : NH_variant_taq_globals . cpp 
// 

// Description : 
// 

// Functions to manage the global variant and TAQ resources. 

// We manage the TAQ and variant tables as global resources 

// so that each NHCompParms object does net need to create its 

// own copy of them. We provide these global functions so that 

// we can control the variables in one location. 

// 

// We should provide some sort of thread protection around 

these 

// resources to make sure that two competing threads do not 

attempt 

// to grab these resources during creation time. How can we do 

this 

// portably? . 

// 

// 

// History: 
// 

// 9/08/97 EFB Created 

// 3/20/98 EFB "Changed names to NH from SN 

// 



#include 



<string . h> 



♦include "NH_util . hpp n 

♦include "NHVariantTable . hpp" 

♦include "NHTAQTable . hpp" 

♦include "NH variant taq_globals . h" 



// define SN 

NHVariantTable 

NHVariantTable 



and GN variant tables 

*NH_snVariantTable = NULL; 
*NH_gnVariantTable = NULL; 



// define a single TAQ table 
NHTAQTable *NH taqTable = NULL; 



// functions to create and return pointers to the tables 

NHVariantTable *NH_getVariantTable (NH_VARIANT_TABLE_TYPES 

variantTableType) 

{ 

NHVariantTable *tablePtr; 
NHVariantTable **tablePtrPtr = NULL; 



switch (variantTableType) { 

case N H_S U RN AM E_V AR I AN T S : 

tablePtr = NH_snVariantTable; 
tablePtrPtr = &NH_snVariantTable ; 
break; 

case NH_GIVENNAME_VARIANTS : 

tablePtr = NH_gnVariantTable; 
tablePtrPtr = &NH_gnVariantTable; 
break; 



default: 

tablePtr - NULL; 

} 

if (tablePtr == NULL) { 
tablePtr = new 

NHVariantTable (variantTableType) ; // create the table, 

if (tablePtrPtr !- NULL) 

*tablePtrPtr = tablePtr; // assign the global 

variable 
} 

return tablePtr; 



NHTAQTable *NH_getTAQTable ( ) 
{ 

if (NH_taqTable == NULL) { 
NH_taqTable = new 

NHTAQTable (NH_PRODUCTION_TAQ_TABLE) ; // create the table 

> 

return NH_taqTable; 

} 



// File: 'NH_util.cpp 
// 

// Description: 
// 

// Implementation of various utility functions used in the 

SNAPI 

// 

// 

// History: 
// 

// 5/15/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 

#include <string . h> 



#include "NH_util . hpp" 
#include "NHCompParms . hpp" 



// function to remove leading and trailing spaces from a string 
// in place. 

// Strips the string at either end or both ends. 

// Stripchars specify the characters that should 

// be stripped. We start by seeing if they want the 

// trailing chars stripped, which is easy. We simply 

// work backwards from the end of the string, looking for 

// the first non-strippable character, and terminate the. 

// string just past that character. Then if they wanted 

// leading chars stripped, we work forwards to the first 

// non-strippable char, and then move that and each following 

// char to the beginning of the string. 

void NH_strip(char *aString) 

{ 

char *end_point; 
char *ch; 
int len; 

if ((len = strlen(aString) ) !- 0) { // if there is a string 
// start at end 
end_point = aString + len - 1; 

// and work back till we get a non-space or get to 

// the begining of our string, chopping off what's left. 

// Also make sure we don't zoom right past the beginning of 

the 

// string. 

for (; strchr (NH_DEFAULT_WHITESPACE, *end_point) != NULL && 
end_point != aString; endjpoint — ) 

II if string was all whitespace 

if ((end_point aString) && strchr (NH_DEFAULT_WHITESPACE, 
*aString) != NULL) 

*aString = EOS; // erase it all, and we're done, 

could return here 
else 

+ (end_point + 1) = EOS; // just chop off excess 



blanks 

// make sure there is still a string, since it might 
// have been stripped entirely above, 
if (*aString) { 

// now find first non space. we know string has at 

least one 

// nonwhite space, so we don't have to check for 

NULL . 

for (ch = aString; strchr (NH_DEFAULT_WHITESPACE, *ch) 

!- NULL; ch++) 

if (ch != aString) ( * // if there were leading spaces, 
move the block back 

char *target = aString; 
while (*ch != EOS) { 

*target = *ch; 

target++; 

ch++; 

} 

// and get the null char also 
^target = *ch; 
} // end if (are there leading spaces?) 
} // end if (and text left?) 
} // end (is there a string at all ?) 

} 

char * NH_strrchr (char *stringStart , char *searchPos, char 

searchChar) 

{ 

while (1) { 

if (*searchPos == searchChar) 
break; 

if (searchPos == stringStart) { 

searchPos = NULL; // string not found, so 

return NULL 

break; 

} 

searchPos — ; 

} 

return searchPos; 

.} 



// 

// File: NH_queens_arrays . hpp 
// 

// Description: 
// 

// Contains global definitions and declarations for the valid 

// combinations of indexes for the best score calculation 

// 
// 

// History: 
// 

// 6/4/97 EFB Created 

// 3/20/98 EFB Changed names to NH from SN 

// 

typedef unsigned char byte; 
byte twoByTwo[] = {1, 0, 

0, 1}; 

byte twoByThree[] = { 1, 2, 

1, 0, 

2, 1, 
2, 0, 
0, 1, 

0, 2}; 

byte twoByFour[] = { 1, 2, 

1, 3, 

1, 0, 

2, 1, 
2, 3, 

2, 0, 

3, 1, 
3, 2, 
3, 0, 
0, 1, 
0, 2, 
0, 3}; 

byte twoByFive[] = { 1, 2, 

U 3, 



1, 4, 

1, 0, 

2, 1, 
2, 3, 
2, 4, 

2, 0, 

3, 1, 
3, 2, 
3, 4, 

3, 0, 

4, 1, 
4, 2, 
4, 3, 
4, 0, 
0, 1, 
0, 2, 
0, 3, 

0, 4}; < 

byte threeByThree [] = { 1, 2, 0, 

1, 0, 2, 

2, 1, 0, 
2, 0, 1, 
0, 1, 2, 

0, 2, 1}; 

byte threeByFour [] = { 1, 2, 3, 

1, 2, 0, 
1, 3, 2, 
1, 3, 0, 
1, 0, 2, 
1, 0, 3, 



2, 1, 3,, 
2, 1, 0, 
2, 3, 1, 
2, 3, 0, 
2, 0, 1, 

2, 0, 3, 

3, 1, 2, 
3, 1, 0, 
3, 2, 1, 
3, 2, 0, 
3, 0, 1, 
3, 0, 2, 
0, 1, 2, 
0, 1, 3, 
0, 2, 1, 
0, 2, 3, 
0, 3, 1, 

0, 3, 2); 

byte threeByFive [] = { 1, 2, 3, 

1, 2, 4, 

1, 2, 0, . 
1, 3, 2, 
1, 3, 4, 
1, 3, 0,. 
1, 4, 2, 
1, 4, 3, 
1, 4, 0, 
1, 0, 2, 
1, 0, 3, 

1, 0, 4, 

2, 1, 3, 



2, 1, 4, 
2, 1, 0, 
2, 3, 1, 
2, 3, 4, 
2, 3, 0, 
2, 4, 1, 
2, 4, 3, 
2, 4, 0, 
2, 0, 1, 
2, 0, 3, 

2, 0, 4, 

3, 1, 2, 
3, 1, 4, 
3, 1, 0, 
3, 2, 1, 
3, 2, 4, 
3, 2, 0, 
3, 4, 1, 
3, 4, 2, 
3, 4, 0, 
3, 0, 1, 
3, 0, 2, 

3, 0, 4, 

4, 1, 2, 
4, 1, 3, 
4, 1, 0, 
4, 2, 1, 
4, 2, 3, 
4, 2, 0, 
4, 3, 1, 



4, 3, 2,, 
4, 3, 0, 
4, 0, 1, 
4, 0, 2, 
4, 0, 3, 
0, 1, 2, 
0, 1, 3, 
0, 1, 4, 
0, 2, 1, 

0, 2, 3, \ 

0, 2, 4, 

0, 3, 1, 

0, 3, 2, 

0, 3, 4, 

0, 4, 1, 

0, 4, 2, 

0, 4, 3}; 

byte fourByFourt] = { 1, 2, 3, 0, 

1, 2, 0, 3, 
1, 3, 0, 2, 
1, 3, 2, 0, 
1, 0, 2, 3, 

1, 0, 3, 2, 

2, 1, 3, 0, 
2, 1, 0, 3, 
2, 3, 1, 0, 
2, 3, 0, 1, 
2, 0, 1, 3, 

2, 0, 3, 1, 

3, 1, 2, 0, 
3, 1, 0, 2, 



3, 2, 1, 0, 
3, 2, 0, 1, 
3, 0, 1, 2, 
3, 0, 2, 1, 
0, 1, 2, 3, 
0, 1, 3, 2, 
0, 2, 1,. 3, 
0, 2, 3, 1, 
0, 3, 1, 2, 

0, 3, 2, 1}; 

byte fourByFive[] = I 1/2, 3, 4, 

1, 2, 3, 0, 
1, 2, 4, 3, 
1, 2, 4, 0, 
1, 2, 0, 3, 
1, 2, 0, 4, 
1, 3, 2, 4, 
1, 3, 2, 0, 
1, 3, 4, 2, 
1, 3, 4, 0, 
1, 3, 0, 2, 
1, 3, 0, 4, 
1, 4, 2, 3, 

1, 4, 2, 0, 

1, 4, 3, 2, 

1, 4, 3, 0, 

1, 4, 0, 2, 

1, 4, 0, 3, 

1, 0, 2, 3, 

1, 0, 2, 4, 



1, 0, 3, 2, 
1, 0, 3, 4, 
1, 0, 4, 2, 

1, 0, 4, 3, 

2, 1, 3, 4, 
2, 1, 3, 0, 
2, 1, 4, 3, 
2, 1, 4, 0, 
2, 1, 0, 3, 
2, 1, 0, 4, 
2, 3, 1, 4, 
2, 3, 1, 0, 
2, 3, 4, 1, 
2, 3, 4, 0, 
2, 3, 0, 1, 
2, 3, 0, 4, 
2, 4, 1, 3, 
2, 4, 1, 0, 
2, 4, 3, 1, 
2, 4, 3, 0, 
2, 4, 0, 1, 
2, 4, 0, 3, 
2, 0, 1, 3, 
2, 0, 1, 4, 
2, 0, 3, 1, 
2, 0, 3, 4, 
2, 0, 4, 1, 

2, 0, 4, 3, 

3, 2, 1, 4, 
3, 2, 1, 0, 
3, 2, 4, 1, 



3, 2, 4, 0, 
3, 2, 0, 1, 
3, 2, 0, 4, 
3, 1, 2, 4, 
3, 1,' 2, 0, 
3, 1, 4, 2, 
3, 1, 4, 0, 
3, 1, 0, 2, 
3, 1, 0, 4, 
3, 4, 2, 1, 
3, 4, 2, 0, 
3, 4, 1, 2, 
3, 4, 1, 0, 
3, 4, 0, 2, 
3, 4, 0, 1, 
3, 0, 2, 1, 
3, 0, 2, 4, 
3, 0, 1, 2, 
3, 0, 1, 4, 
3, 0, 4, 2, 

3, 0, 4, 1, 

4, 2, 3, 1, 
4, 2, 3, 0, 
4, 2, 1, 3, 
4, 2, 1, 0, 
4, 2, 0, 3, 
4, 2, 0, 1, 
4, 3, 2, 1, 
4, 3, 2, 0, 
4, 3, 1, 2, 



/* Generated by VariantManager */ 
addVariant("ANN","ANITA",0.85,"E "); 
addVariant("ANN","ANA",0.85,"E "); 
addVariant( ,, ANN" ) ,, ANNIE ,, ,0.90,"E "); 
addVariant("ANN","ANNA",0.85,"E "); 

addVariant("ANN","ANNE",0.95,"E"); 
addVariant("ANN","ANNETTE",0.85,"E "); 



/* Generated by VariantManager */ 
addVariant("SON n , ,, SWUN",0.95, ,, C "); 
addVariant("SON","SHON" ) 0.95,"K"); 
addVariant("SON" ) "SOHN ,, ,0.95,"K"); 



/* Generated by TAQManager */ 
addTAQValue( M SENORITA",'T , , , N , , , X , , , XV , G "); 



